Make sure that Source option is set to Chunk output in Console and do not select Show Previews inline
library(ape) #read.tree()
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.4
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.4-5
library(beanplot)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(iNEXT)
library(iNextPD)
##
## Attaching package: 'iNextPD'
## The following object is masked from 'package:iNEXT':
##
## ggiNEXT
library(ade4)
library(boral)
## Loading required package: coda
## If you recently updated boral, please check news(package = "boral") for the updates in the latest version.
library(mvabund)
library(RColorBrewer)
library(betapart)
library(forcats)
library(stringr)
library(SpeciesMix)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: numDeriv
library(beepr)
library(corrplot)
## corrplot 0.84 loaded
library(lulu)
sessionInfo()
## R version 3.3.3 (2017-03-06)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS 10.13.6
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] lulu_0.1.0 corrplot_0.84 beepr_1.3
## [4] SpeciesMix_0.3.4 numDeriv_2016.8-1 MASS_7.3-50
## [7] betapart_1.5.0 RColorBrewer_1.1-2 mvabund_3.12.3
## [10] boral_1.6.1 coda_0.19-1 ade4_1.7-10
## [13] iNextPD_0.3.2 iNEXT_2.0.12 car_3.0-0
## [16] carData_3.0-1 beanplot_1.2 vegan_2.4-5
## [19] lattice_0.20-35 permute_0.9-4 forcats_0.3.0
## [22] stringr_1.3.1 dplyr_0.7.4 purrr_0.2.5
## [25] readr_1.1.1 tidyr_0.8.1 tibble_1.4.2
## [28] ggplot2_2.2.1 tidyverse_1.2.1 ape_5.0
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-131 lubridate_1.7.4 httr_1.3.1
## [4] rprojroot_1.3-2 tools_3.3.3 backports_1.1.2
## [7] R6_2.2.2 R2WinBUGS_2.1-21 lazyeval_0.2.1
## [10] mgcv_1.8-22 colorspace_1.3-2 mnormt_1.5-5
## [13] curl_3.2 cli_1.0.0 rvest_0.3.2
## [16] xml2_1.2.0 scales_0.5.0 mvtnorm_1.0-6
## [19] psych_1.8.4 digest_0.6.15 foreign_0.8-70
## [22] rmarkdown_1.9 rio_0.5.10 pkgconfig_2.0.1
## [25] htmltools_0.3.6 rlang_0.2.1 readxl_1.1.0
## [28] rstudioapi_0.7 bindr_0.1 jsonlite_1.5
## [31] zip_1.0.0 magrittr_1.5 Matrix_1.2-12
## [34] Rcpp_0.12.17 munsell_0.4.3 abind_1.4-5
## [37] stringi_1.2.2 yaml_2.1.19 plyr_1.8.4
## [40] fishMod_0.29 grid_3.3.3 parallel_3.3.3
## [43] crayon_1.3.4 haven_1.1.1 hms_0.4.2
## [46] knitr_1.20 rcdd_1.2 pillar_1.2.3
## [49] boot_1.3-20 reshape2_1.4.3 magic_1.5-8
## [52] R2jags_0.5-7 picante_1.7 glue_1.2.0
## [55] evaluate_0.10.1 data.table_1.11.4 modelr_0.1.2
## [58] cellranger_1.1.0 gtable_0.2.0 assertthat_0.2.0
## [61] openxlsx_4.1.0 tweedie_2.3.2 broom_0.4.4
## [64] rjags_4-6 audio_0.1-5 geometry_0.3-6
## [67] bindrcpp_0.2 cluster_2.0.6 statmod_1.4.30
cd Documents/kiz/ecec/2014-GFGP/MBC-miseq/Eco_analysis/2014MBC_CROP97_3507otus/1lulu_3507 vsearch –usearch_global 2libs_CROP97.cluster3507.fasta –db 2libs_CROP97.cluster3507.fasta –self –id .84 –iddef 1 –userout match_list.txt -userfields query+target+id –maxaccepts 0 –query_cov .9 –maxhits 10
inputfile <- "./data/2014MBC_44reads_otu543_LandsatEnv.txt" # post phyloseq filtering and filtering for trees, using final bioinformatics pipeline with vsearch and RDP Classifier and phyloseq at min20reads, with landsat data
# command from readr package, with options on formatting the columns
gfgMB <- read_tsv(
inputfile, col_names = TRUE, na = "NA",
col_types = cols(
Site = col_character(),
Habitat = col_factor(c("BB", "CL", "EC", "JC", "MF", "NF")),
Type = col_factor(c("1", "2", "3", "4", "5", "6")),
Altitude = col_integer(),
sampling_time = col_date(format = "%d/%m/%Y"),
weather_value = col_factor(c("cloudy", "sunny", "rainy")),
Landsat_value = col_factor(c("1", "2", "3", "4", "5", "6")),
longitude = col_double(),
latitude = col_double(),
Elevation_m = col_integer()
)
)
gfgMB <- tbl_df(gfgMB)
# with(gfgMB, plot(Altitude ~ Elevation_m, col=as.numeric(Habitat))) # Altitude variable is not reliable; use Elevation_m instead, which is calculated from DEM
# make environment dataset
habitat <- gfgMB %>% dplyr::select(Site:Simpson_evenness_index)
habitat <- habitat %>% dplyr::filter(!is.na(Habitat)) # remove taxonomy rows
colnames(habitat)[11:80] <- paste0("x", colnames(habitat)[11:80]) # column names need to start with a letter
community <- gfgMB %>% dplyr::select(starts_with("Cluster"))
# otuvector <- colnames(community)
community_t <- t(community)
community_t <- as.data.frame(community_t)
community_t <- rownames_to_column(community_t)
colnames(community_t) <- c("otu", gfgMB$Site) # add column names
#communityAll_t <- community_t %>% dplyr::filter(phylum=="Arthropoda")
#communityAll_t <- community_t %>% dplyr::select(-c(kingdom:species))
communityAll <- t(community_t)
colvector <- communityAll[1,] # make a vector of the first row, which has the otu names
communityAll <- as.data.frame(communityAll)
colnames(communityAll) <- colvector # add the otu names to the column names
communityAll <- communityAll[-1,] # remove first row, which has the column names
# convert the columns to numeric from factor
# http://stackoverflow.com/questions/2288485/how-to-convert-a-data-frame-column-to-numeric-type
communityAll <- sapply(communityAll, function(x) as.numeric(as.character(x))) # sapply applies a function to each column, and the function is: function(x) as.numeric(as.character(x)). Cannot convert factors to numeric directly. first convert to character, then to numeric
communityAll <- as.data.frame(communityAll) # then convert to df
##### calculate distribution of read numbers per OTU to set minimum number
otureads <- c(colSums(communityAll)) # list of the reads per OTU
sum(otureads) ## 1,900,539 reads total
## [1] 1840033
otureads[otureads>5000] <- 5000 # to make the histogram readable
otuhist <- hist(otureads, breaks = 100)
text(otuhist$mids, otuhist$counts, cex = 0.5, otuhist$counts, adj = c(.5, -.5), col = "blue3")
Filter out sites with (1) low reads (<= 100), (2) very low numbers of species
community <- communityAll
#community <- t(new_table_lulu)
community <- as.data.frame(community)
#sort(colSums(community))
community[community < 5] <- 0 # set small cells to 0.
habitat$rowsums <- rowSums(community)
habitat$sprichness <- specnumber(community, MARGIN = 1) # number of species per site
# keep only sites that have more than 100 reads (removed 2, still remain 68)
community <- community %>% dplyr::filter(habitat$rowsums > 100)
habitatN <- habitat %>% dplyr::filter(habitat$rowsums > 100)
rowSums(community)
## [1] 12659 33270 36571 17609 10507 2171 32120 14412 28118 12189
## [11] 43303 40239 33982 36947 11959 28858 5511 18808 21798 7691
## [21] 1800 55657 24493 6154 13830 37230 34459 2408 25348 29522
## [31] 41848 40834 22431 46323 49219 20747 47123 69667 13828 10068
## [41] 35544 29235 14434 12513 38770 19366 15117 882 27033 6975
## [51] 22962 6660 1042 28549 1997 20437 37110 39174 57522 93620
## [61] 21563 665 31150 33367 24136 26764 100009 51054
# keep only sites that have >=5 species (removed 68 - 61 = 7 sites)
community <- community %>% dplyr::filter(habitatN$sprichness >= 5)
habitatN <- habitatN %>% dplyr::filter(habitatN$sprichness >= 5)
#sort(colSums(community))
habitatN <- droplevels(habitatN)
community <- community[, colSums(community)>=20]
#Cluster31064 Cluster334816 Cluster672922 Cluster932857 Cluster989447 Cluster273565 Cluster1265861
beanplot(rowSums(community)~habitatN$Habitat, col = c("grey", "white"), xlab = "Habitat type", ylab = "Reads number")
# Kampstra, P. Beanplot: A Boxplot Alternative for Visual Comparison of Distributions. Journal of Statistical Software, Code Snippets 28(1). 1-9 (2008)
nboot <- 999 # set to 999 for publication
# base model with no levels combined
reads.glm <- manyglm(rowSums(community) ~ habitatN$Habitat)
plot(reads.glm)
anova(reads.glm, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.04, df = 5, score = 8.671, nBoot = 999, 1 min. all reads number are not significantly defferent from each other
## Resampling begins for test 1.
## Resampling run 0 finished. Time elapsed: 0.00 minutes...
## Resampling run 100 finished. Time elapsed: 0.05 minutes...
## Resampling run 200 finished. Time elapsed: 0.09 minutes...
## Resampling run 300 finished. Time elapsed: 0.14 minutes...
## Resampling run 400 finished. Time elapsed: 0.18 minutes...
## Resampling run 500 finished. Time elapsed: 0.23 minutes...
## Resampling run 600 finished. Time elapsed: 0.27 minutes...
## Resampling run 700 finished. Time elapsed: 0.32 minutes...
## Resampling run 800 finished. Time elapsed: 0.36 minutes...
## Resampling run 900 finished. Time elapsed: 0.41 minutes...
## Time elapsed: 0 hr 0 min 27 sec
## Analysis of Variance Table
##
## Model: manyglm(formula = rowSums(community) ~ habitatN$Habitat)
##
## Multivariate test:
## Res.Df Df.diff score Pr(>score)
## (Intercept) 60
## habitatN$Habitat 55 5 8.671 0.041 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments: P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
#### for otus taxa tree
#install.packages("metacoder")
library(metacoder)
## Loading required package: taxa
##
## Attaching package: 'taxa'
## The following object is masked from 'package:ggplot2':
##
## map_data
## This is metacoder verison 0.2.1 (stable). If you use metacoder for published research, please cite our paper:
##
## Foster Z, Sharpton T and Grunwald N (2017). "Metacoder: An R package for visualization and manipulation of community taxonomic diversity data." PLOS Computational Biology, 13(2), pp. 1-15. doi: 10.1371/journal.pcbi.1005404
##
## Enter `citation("metacoder")` for a BibTeX entry for this citation.
##
## Attaching package: 'metacoder'
## The following object is masked from 'package:ape':
##
## complement
library(tibble)
mbc_otus <- read.table("data/forMetacoder/2014MBC_otu536_tax.txt", header = T, sep = "\t")
mbc_samples <- read.table("data/forMetacoder/2014MBC_sample536.txt", header = T, sep = "\t")
mbc_otus[mbc_otus>1] <- 1 # using presence/absance data
#str(mbc_otus)
#str(mbc_samples)
## change data type
mbc_otus$OTU_id <- as.character(mbc_otus$OTU_id)
mbc_otus$lineage <- as.character(mbc_otus$lineage)
mbc_samples$Site <- as.character(mbc_samples$Site)
mbc_samples$Habitat <- as.character(mbc_samples$Habitat)
##
mbc_otus <- as_tibble(mbc_otus)
mbc_samples <- as_tibble(mbc_samples)
#print(mbc_otus)
#print(mbc_samples)
obj <- parse_tax_data(mbc_otus, class_cols = "lineage", class_sep = ";",
class_key = c(tax_rank = "info", tax_name = "taxon_name"),
class_regex = "^(.+)__(.+)$")
# This returns a taxmap object. The taxmap class is designed to store any number of tables, lists, or vectors associated with taxonomic information and facilitate manipulating the data in a cohesive way. Here is what that object looks like:
print(obj) # or click on the object in the Environment pane
## <Taxmap>
## 144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
## 144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
## 2 data sets:
## tax_data:
## # A tibble: 536 x 64
## taxon_id OTU_id lineage BB04 BB05 BB06 BB07 BB08
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ao Cluste… r__root;… 0 0 0 0 0
## 2 ag Cluste… r__root;… 0 0 0 0 0
## 3 ad Cluste… r__root;… 0 0 0 0 0
## # ... with 533 more rows, and 56 more variables:
## # BB09 <dbl>, BB10 <dbl>, CL01 <dbl>, CL03 <dbl>,
## # CL04 <dbl>, CL05 <dbl>, CL06 <dbl>, CL07 <dbl>,
## # CL08 <dbl>, CL09 <dbl>, CL10 <dbl>, CL11 <dbl>,
## # CL12 <dbl>, CL13 <dbl>, CL14 <dbl>, CL15 <dbl>,
## # CL16 <dbl>, EC01 <dbl>, EC05 <dbl>, EC06 <dbl>,
## # EC07 <dbl>, EC08 <dbl>, EC09 <dbl>, EC10 <dbl>,
## # JC01 <dbl>, JC02 <dbl>, JC03 <dbl>, JC05 <dbl>,
## # JC06 <dbl>, JC07 <dbl>, JC09 <dbl>, JC10 <dbl>,
## # JC11 <dbl>, JC12 <dbl>, MF01 <dbl>, MF02 <dbl>,
## # MF04 <dbl>, MF05 <dbl>, MF06 <dbl>, MF07 <dbl>,
## # MF08 <dbl>, MF09 <dbl>, MF10 <dbl>, NF02 <dbl>,
## # NF05 <dbl>, NF06 <dbl>, NF07 <dbl>, NF08 <dbl>,
## # NF09 <dbl>, NF10 <dbl>, NF11 <dbl>, NF12 <dbl>,
## # NF13 <dbl>, NF14 <dbl>, NF15 <dbl>, NF16 <dbl>
## class_data:
## # A tibble: 2,112 x 5
## taxon_id input_index tax_rank tax_name regex_match
## <chr> <int> <chr> <chr> <chr>
## 1 ab 1 r root r__root
## 2 ac 1 p Arthropoda p__Arthropoda
## 3 ad 1 c Insecta c__Insecta
## # ... with 2,109 more rows
## 0 functions:
# accounting for un-even sampling
obj$data$tax_data <- calc_obs_props(obj, "tax_data")
## No `cols` specified, so using all numeric columns:
## BB04, BB05, BB06, BB07, BB08 ... NF12, NF13, NF14, NF15, NF16
## Calculating proportions from counts for 61 columns for 536 observations.
print(obj)
## <Taxmap>
## 144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
## 144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
## 2 data sets:
## tax_data:
## # A tibble: 536 x 62
## taxon_id BB04 BB05 BB06 BB07 BB08 BB09 BB10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ao 0 0 0 0 0 0 0
## 2 ag 0 0 0 0 0 0 0.0476
## 3 ad 0 0 0 0 0 0 0
## # ... with 533 more rows, and 54 more variables:
## # CL01 <dbl>, CL03 <dbl>, CL04 <dbl>, CL05 <dbl>,
## # CL06 <dbl>, CL07 <dbl>, CL08 <dbl>, CL09 <dbl>,
## # CL10 <dbl>, CL11 <dbl>, CL12 <dbl>, CL13 <dbl>,
## # CL14 <dbl>, CL15 <dbl>, CL16 <dbl>, EC01 <dbl>,
## # EC05 <dbl>, EC06 <dbl>, EC07 <dbl>, EC08 <dbl>,
## # EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
## # JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>,
## # JC09 <dbl>, JC10 <dbl>, JC11 <dbl>, JC12 <dbl>,
## # MF01 <dbl>, MF02 <dbl>, MF04 <dbl>, MF05 <dbl>,
## # MF06 <dbl>, MF07 <dbl>, MF08 <dbl>, MF09 <dbl>,
## # MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
## # NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>,
## # NF11 <dbl>, NF12 <dbl>, NF13 <dbl>, NF14 <dbl>,
## # NF15 <dbl>, NF16 <dbl>
## class_data:
## # A tibble: 2,112 x 5
## taxon_id input_index tax_rank tax_name regex_match
## <chr> <int> <chr> <chr> <chr>
## 1 ab 1 r root r__root
## 2 ac 1 p Arthropoda p__Arthropoda
## 3 ad 1 c Insecta c__Insecta
## # ... with 2,109 more rows
## 0 functions:
# Getting per-taxon information
# Currently, we have values for the abundance of each OTU, not each taxon. To get information on the taxa, we can sum the abundance per-taxon like so:
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data",
cols = mbc_samples$Site)
## Summing per-taxon counts from 61 columns for 144 taxa
print(obj)
## <Taxmap>
## 144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
## 144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
## 3 data sets:
## tax_data:
## # A tibble: 536 x 62
## taxon_id BB04 BB05 BB06 BB07 BB08 BB09 BB10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ao 0 0 0 0 0 0 0
## 2 ag 0 0 0 0 0 0 0.0476
## 3 ad 0 0 0 0 0 0 0
## # ... with 533 more rows, and 54 more variables:
## # CL01 <dbl>, CL03 <dbl>, CL04 <dbl>, CL05 <dbl>,
## # CL06 <dbl>, CL07 <dbl>, CL08 <dbl>, CL09 <dbl>,
## # CL10 <dbl>, CL11 <dbl>, CL12 <dbl>, CL13 <dbl>,
## # CL14 <dbl>, CL15 <dbl>, CL16 <dbl>, EC01 <dbl>,
## # EC05 <dbl>, EC06 <dbl>, EC07 <dbl>, EC08 <dbl>,
## # EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
## # JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>,
## # JC09 <dbl>, JC10 <dbl>, JC11 <dbl>, JC12 <dbl>,
## # MF01 <dbl>, MF02 <dbl>, MF04 <dbl>, MF05 <dbl>,
## # MF06 <dbl>, MF07 <dbl>, MF08 <dbl>, MF09 <dbl>,
## # MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
## # NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>,
## # NF11 <dbl>, NF12 <dbl>, NF13 <dbl>, NF14 <dbl>,
## # NF15 <dbl>, NF16 <dbl>
## class_data:
## # A tibble: 2,112 x 5
## taxon_id input_index tax_rank tax_name regex_match
## <chr> <int> <chr> <chr> <chr>
## 1 ab 1 r root r__root
## 2 ac 1 p Arthropoda p__Arthropoda
## 3 ad 1 c Insecta c__Insecta
## # ... with 2,109 more rows
## tax_abund:
## # A tibble: 144 x 62
## taxon_id BB04 BB05 BB06 BB07 BB08 BB09 BB10 CL01
## * <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ab 1 1 1 1 1 1 1 1
## 2 ac 1 1 1 1 1 1 1 1
## 3 ad 0.955 0.812 0.864 0.944 0.571 0.808 0.905 1
## # ... with 141 more rows, and 53 more variables:
## # CL03 <dbl>, CL04 <dbl>, CL05 <dbl>, CL06 <dbl>,
## # CL07 <dbl>, CL08 <dbl>, CL09 <dbl>, CL10 <dbl>,
## # CL11 <dbl>, CL12 <dbl>, CL13 <dbl>, CL14 <dbl>,
## # CL15 <dbl>, CL16 <dbl>, EC01 <dbl>, EC05 <dbl>,
## # EC06 <dbl>, EC07 <dbl>, EC08 <dbl>, EC09 <dbl>,
## # EC10 <dbl>, JC01 <dbl>, JC02 <dbl>, JC03 <dbl>,
## # JC05 <dbl>, JC06 <dbl>, JC07 <dbl>, JC09 <dbl>,
## # JC10 <dbl>, JC11 <dbl>, JC12 <dbl>, MF01 <dbl>,
## # MF02 <dbl>, MF04 <dbl>, MF05 <dbl>, MF06 <dbl>,
## # MF07 <dbl>, MF08 <dbl>, MF09 <dbl>, MF10 <dbl>,
## # NF02 <dbl>, NF05 <dbl>, NF06 <dbl>, NF07 <dbl>,
## # NF08 <dbl>, NF09 <dbl>, NF10 <dbl>, NF11 <dbl>,
## # NF12 <dbl>, NF13 <dbl>, NF14 <dbl>, NF15 <dbl>,
## # NF16 <dbl>
## 0 functions:
# Note that there is now an additional table with one row per taxon.
# We can also easily calculate the number of samples have reads for each taxon:
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = mbc_samples$Habitat)
## No `cols` specified, so using all numeric columns:
## BB04, BB05, BB06, BB07, BB08 ... NF12, NF13, NF14, NF15, NF16
## Calculating number of samples with non-zero counts from 61 columns in 6 groups for 144 observations
print(obj)
## <Taxmap>
## 144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
## 144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
## 4 data sets:
## tax_data:
## # A tibble: 536 x 62
## taxon_id BB04 BB05 BB06 BB07 BB08 BB09 BB10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ao 0 0 0 0 0 0 0
## 2 ag 0 0 0 0 0 0 0.0476
## 3 ad 0 0 0 0 0 0 0
## # ... with 533 more rows, and 54 more variables:
## # CL01 <dbl>, CL03 <dbl>, CL04 <dbl>, CL05 <dbl>,
## # CL06 <dbl>, CL07 <dbl>, CL08 <dbl>, CL09 <dbl>,
## # CL10 <dbl>, CL11 <dbl>, CL12 <dbl>, CL13 <dbl>,
## # CL14 <dbl>, CL15 <dbl>, CL16 <dbl>, EC01 <dbl>,
## # EC05 <dbl>, EC06 <dbl>, EC07 <dbl>, EC08 <dbl>,
## # EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
## # JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>,
## # JC09 <dbl>, JC10 <dbl>, JC11 <dbl>, JC12 <dbl>,
## # MF01 <dbl>, MF02 <dbl>, MF04 <dbl>, MF05 <dbl>,
## # MF06 <dbl>, MF07 <dbl>, MF08 <dbl>, MF09 <dbl>,
## # MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
## # NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>,
## # NF11 <dbl>, NF12 <dbl>, NF13 <dbl>, NF14 <dbl>,
## # NF15 <dbl>, NF16 <dbl>
## class_data:
## # A tibble: 2,112 x 5
## taxon_id input_index tax_rank tax_name regex_match
## <chr> <int> <chr> <chr> <chr>
## 1 ab 1 r root r__root
## 2 ac 1 p Arthropoda p__Arthropoda
## 3 ad 1 c Insecta c__Insecta
## # ... with 2,109 more rows
## tax_abund:
## # A tibble: 144 x 62
## taxon_id BB04 BB05 BB06 BB07 BB08 BB09 BB10 CL01
## * <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ab 1 1 1 1 1 1 1 1
## 2 ac 1 1 1 1 1 1 1 1
## 3 ad 0.955 0.812 0.864 0.944 0.571 0.808 0.905 1
## # ... with 141 more rows, and 53 more variables:
## # CL03 <dbl>, CL04 <dbl>, CL05 <dbl>, CL06 <dbl>,
## # CL07 <dbl>, CL08 <dbl>, CL09 <dbl>, CL10 <dbl>,
## # CL11 <dbl>, CL12 <dbl>, CL13 <dbl>, CL14 <dbl>,
## # CL15 <dbl>, CL16 <dbl>, EC01 <dbl>, EC05 <dbl>,
## # EC06 <dbl>, EC07 <dbl>, EC08 <dbl>, EC09 <dbl>,
## # EC10 <dbl>, JC01 <dbl>, JC02 <dbl>, JC03 <dbl>,
## # JC05 <dbl>, JC06 <dbl>, JC07 <dbl>, JC09 <dbl>,
## # JC10 <dbl>, JC11 <dbl>, JC12 <dbl>, MF01 <dbl>,
## # MF02 <dbl>, MF04 <dbl>, MF05 <dbl>, MF06 <dbl>,
## # MF07 <dbl>, MF08 <dbl>, MF09 <dbl>, MF10 <dbl>,
## # NF02 <dbl>, NF05 <dbl>, NF06 <dbl>, NF07 <dbl>,
## # NF08 <dbl>, NF09 <dbl>, NF10 <dbl>, NF11 <dbl>,
## # NF12 <dbl>, NF13 <dbl>, NF14 <dbl>, NF15 <dbl>,
## # NF16 <dbl>
## tax_occ:
## # A tibble: 144 x 7
## taxon_id BB CL EC JC MF NF
## <chr> <int> <int> <int> <int> <int> <int>
## 1 ab 7 15 7 10 9 13
## 2 ac 7 15 7 10 9 13
## 3 ad 7 15 7 10 9 13
## # ... with 141 more rows
## 0 functions:
# Plotting taxonomic data
# Now that we have per-taxon information, we can plot the information using heat trees. The code below plots the number of “Nose” samples that have reads for each taxon. It also plots the number of OTUs assigned to each taxon in the overall dataset.
heat_tree(obj,
node_label = obj$taxon_names(),
node_size = obj$n_obs(),
node_color = obj$data$tax_occ$NF,
node_size_axis_label = "OTU count",
node_color_axis_label = "Samples with reads")
# Comparing any number of treatments/groups
obj$data$diff_table <- compare_groups(obj, dataset = "tax_abund",
cols = mbc_samples$Site,
groups = mbc_samples$Habitat)
print(obj$data$diff_table)
## # A tibble: 2,160 x 7
## taxon_id treatment_1 treatment_2 log2_median_rat… median_diff mean_diff
## * <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 ab BB CL 0 0 0
## 2 ac BB CL 0 0 0
## 3 ad BB CL -0.153 -0.0964 -0.111
## 4 ae BB CL 0.856 0.0213 0.0710
## 5 af BB CL 0 0 -0.0141
## 6 ag BB CL 0.495 0.0264 0.0143
## 7 ah BB CL 0.856 0.0213 0.0710
## 8 ai BB CL -0.737 -0.182 -0.163
## 9 aj BB CL 0 0 0.0168
## 10 ak BB CL -Inf -0.0645 -0.0643
## # ... with 2,150 more rows, and 1 more variable: wilcox_p_value <dbl>
heat_tree_matrix(obj,
dataset = "diff_table",
node_size = n_obs,
node_label = taxon_names,
node_color = log2_median_ratio,
node_color_range = diverging_palette(),
node_color_trans = "linear",
node_color_interval = c(-3, 3),
edge_color_interval = c(-3, 3),
node_size_axis_label = "Number of OTUs",
node_color_axis_label = "Log2 ratio median proportions")
# page(compare_groups) #check codes of compare_groups, change "median" to "mean"
##### remove CL group to see difference in forest
otus_without_CL <- mbc_otus %>% dplyr::select(-c(CL01:CL16))
samples_without_CL <- mbc_samples %>% dplyr::filter(mbc_samples$Habitat %in% c("BB", "EC", "JC", "MF", "NF"))
print(otus_without_CL)
## # A tibble: 536 x 48
## OTU_id lineage BB04 BB05 BB06 BB07 BB08 BB09 BB10 EC01 EC05
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Cluste… r__root;… 0 0 0 0 0 0 0 0 1
## 2 Cluste… r__root;… 0 0 0 0 0 0 1 0 1
## 3 Cluste… r__root;… 0 0 0 0 0 0 0 0 0
## 4 Cluste… r__root;… 0 0 0 0 0 0 0 0 0
## 5 Cluste… r__root;… 0 0 0 0 0 1 0 0 0
## 6 Cluste… r__root;… 0 0 0 0 0 0 0 0 0
## 7 Cluste… r__root;… 0 0 0 0 0 0 0 0 0
## 8 Cluste… r__root;… 0 0 0 0 0 0 0 0 0
## 9 Cluste… r__root;… 0 0 0 0 0 0 0 0 0
## 10 Cluste… r__root;… 0 0 0 0 0 0 0 0 0
## # ... with 526 more rows, and 37 more variables: EC06 <dbl>, EC07 <dbl>,
## # EC08 <dbl>, EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
## # JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>, JC09 <dbl>,
## # JC10 <dbl>, JC11 <dbl>, JC12 <dbl>, MF01 <dbl>, MF02 <dbl>,
## # MF04 <dbl>, MF05 <dbl>, MF06 <dbl>, MF07 <dbl>, MF08 <dbl>,
## # MF09 <dbl>, MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
## # NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>, NF11 <dbl>,
## # NF12 <dbl>, NF13 <dbl>, NF14 <dbl>, NF15 <dbl>, NF16 <dbl>
print(samples_without_CL)
## # A tibble: 46 x 4
## Site Habitat Type Altitude
## <chr> <chr> <int> <int>
## 1 BB04 BB 1 900
## 2 BB05 BB 1 800
## 3 BB06 BB 1 850
## 4 BB07 BB 1 800
## 5 BB08 BB 1 750
## 6 BB09 BB 1 800
## 7 BB10 BB 1 800
## 8 EC01 EC 3 500
## 9 EC05 EC 3 600
## 10 EC06 EC 3 600
## # ... with 36 more rows
#str(otus_without_CL)
#str(samples_without_CL)
objF <- parse_tax_data(otus_without_CL, class_cols = "lineage", class_sep = ";",
class_key = c(tax_rank = "info", tax_name = "taxon_name"),
class_regex = "^(.+)__(.+)$")
# This returns a taxmap object. The taxmap class is designed to store any number of tables, lists, or vectors associated with taxonomic information and facilitate manipulating the data in a cohesive way. Here is what that object looks like:
print(objF) # or click on the object in the Environment pane
## <Taxmap>
## 144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
## 144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
## 2 data sets:
## tax_data:
## # A tibble: 536 x 49
## taxon_id OTU_id lineage BB04 BB05 BB06 BB07 BB08
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ao Cluste… r__root;… 0 0 0 0 0
## 2 ag Cluste… r__root;… 0 0 0 0 0
## 3 ad Cluste… r__root;… 0 0 0 0 0
## # ... with 533 more rows, and 41 more variables:
## # BB09 <dbl>, BB10 <dbl>, EC01 <dbl>, EC05 <dbl>,
## # EC06 <dbl>, EC07 <dbl>, EC08 <dbl>, EC09 <dbl>,
## # EC10 <dbl>, JC01 <dbl>, JC02 <dbl>, JC03 <dbl>,
## # JC05 <dbl>, JC06 <dbl>, JC07 <dbl>, JC09 <dbl>,
## # JC10 <dbl>, JC11 <dbl>, JC12 <dbl>, MF01 <dbl>,
## # MF02 <dbl>, MF04 <dbl>, MF05 <dbl>, MF06 <dbl>,
## # MF07 <dbl>, MF08 <dbl>, MF09 <dbl>, MF10 <dbl>,
## # NF02 <dbl>, NF05 <dbl>, NF06 <dbl>, NF07 <dbl>,
## # NF08 <dbl>, NF09 <dbl>, NF10 <dbl>, NF11 <dbl>,
## # NF12 <dbl>, NF13 <dbl>, NF14 <dbl>, NF15 <dbl>,
## # NF16 <dbl>
## class_data:
## # A tibble: 2,112 x 5
## taxon_id input_index tax_rank tax_name regex_match
## <chr> <int> <chr> <chr> <chr>
## 1 ab 1 r root r__root
## 2 ac 1 p Arthropoda p__Arthropoda
## 3 ad 1 c Insecta c__Insecta
## # ... with 2,109 more rows
## 0 functions:
# removing low abundance counts
#objF$data$tax_data <- zero_low_counts(objF, "tax_data", min_count = 5)
#no_reads <- rowSums(objF$data$tax_data[, samples_without_CL$Site]) == 0
#sum(no_reads)
#objF <- filter_obs(objF, "tax_data", ! no_reads, drop_taxa = TRUE)
#print(objF)
# accounting for un-even sampling
objF$data$tax_data <- calc_obs_props(objF, "tax_data")
## No `cols` specified, so using all numeric columns:
## BB04, BB05, BB06, BB07, BB08 ... NF12, NF13, NF14, NF15, NF16
## Calculating proportions from counts for 46 columns for 536 observations.
print(objF)
## <Taxmap>
## 144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
## 144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
## 2 data sets:
## tax_data:
## # A tibble: 536 x 47
## taxon_id BB04 BB05 BB06 BB07 BB08 BB09 BB10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ao 0 0 0 0 0 0 0
## 2 ag 0 0 0 0 0 0 0.0476
## 3 ad 0 0 0 0 0 0 0
## # ... with 533 more rows, and 39 more variables:
## # EC01 <dbl>, EC05 <dbl>, EC06 <dbl>, EC07 <dbl>,
## # EC08 <dbl>, EC09 <dbl>, EC10 <dbl>, JC01 <dbl>,
## # JC02 <dbl>, JC03 <dbl>, JC05 <dbl>, JC06 <dbl>,
## # JC07 <dbl>, JC09 <dbl>, JC10 <dbl>, JC11 <dbl>,
## # JC12 <dbl>, MF01 <dbl>, MF02 <dbl>, MF04 <dbl>,
## # MF05 <dbl>, MF06 <dbl>, MF07 <dbl>, MF08 <dbl>,
## # MF09 <dbl>, MF10 <dbl>, NF02 <dbl>, NF05 <dbl>,
## # NF06 <dbl>, NF07 <dbl>, NF08 <dbl>, NF09 <dbl>,
## # NF10 <dbl>, NF11 <dbl>, NF12 <dbl>, NF13 <dbl>,
## # NF14 <dbl>, NF15 <dbl>, NF16 <dbl>
## class_data:
## # A tibble: 2,112 x 5
## taxon_id input_index tax_rank tax_name regex_match
## <chr> <int> <chr> <chr> <chr>
## 1 ab 1 r root r__root
## 2 ac 1 p Arthropoda p__Arthropoda
## 3 ad 1 c Insecta c__Insecta
## # ... with 2,109 more rows
## 0 functions:
# Getting per-taxon information
# Currently, we have values for the abundance of each OTU, not each taxon. To get information on the taxa, we can sum the abundance per-taxon like so:
objF$data$tax_abund <- calc_taxon_abund(objF, "tax_data",
cols = samples_without_CL$Site)
## Summing per-taxon counts from 46 columns for 144 taxa
print(objF)
## <Taxmap>
## 144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
## 144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
## 3 data sets:
## tax_data:
## # A tibble: 536 x 47
## taxon_id BB04 BB05 BB06 BB07 BB08 BB09 BB10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ao 0 0 0 0 0 0 0
## 2 ag 0 0 0 0 0 0 0.0476
## 3 ad 0 0 0 0 0 0 0
## # ... with 533 more rows, and 39 more variables:
## # EC01 <dbl>, EC05 <dbl>, EC06 <dbl>, EC07 <dbl>,
## # EC08 <dbl>, EC09 <dbl>, EC10 <dbl>, JC01 <dbl>,
## # JC02 <dbl>, JC03 <dbl>, JC05 <dbl>, JC06 <dbl>,
## # JC07 <dbl>, JC09 <dbl>, JC10 <dbl>, JC11 <dbl>,
## # JC12 <dbl>, MF01 <dbl>, MF02 <dbl>, MF04 <dbl>,
## # MF05 <dbl>, MF06 <dbl>, MF07 <dbl>, MF08 <dbl>,
## # MF09 <dbl>, MF10 <dbl>, NF02 <dbl>, NF05 <dbl>,
## # NF06 <dbl>, NF07 <dbl>, NF08 <dbl>, NF09 <dbl>,
## # NF10 <dbl>, NF11 <dbl>, NF12 <dbl>, NF13 <dbl>,
## # NF14 <dbl>, NF15 <dbl>, NF16 <dbl>
## class_data:
## # A tibble: 2,112 x 5
## taxon_id input_index tax_rank tax_name regex_match
## <chr> <int> <chr> <chr> <chr>
## 1 ab 1 r root r__root
## 2 ac 1 p Arthropoda p__Arthropoda
## 3 ad 1 c Insecta c__Insecta
## # ... with 2,109 more rows
## tax_abund:
## # A tibble: 144 x 47
## taxon_id BB04 BB05 BB06 BB07 BB08 BB09 BB10 EC01
## * <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ab 1 1 1 1 1 1 1 1
## 2 ac 1 1 1 1 1 1 1 1
## 3 ad 0.955 0.812 0.864 0.944 0.571 0.808 0.905 1
## # ... with 141 more rows, and 38 more variables:
## # EC05 <dbl>, EC06 <dbl>, EC07 <dbl>, EC08 <dbl>,
## # EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
## # JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>,
## # JC09 <dbl>, JC10 <dbl>, JC11 <dbl>, JC12 <dbl>,
## # MF01 <dbl>, MF02 <dbl>, MF04 <dbl>, MF05 <dbl>,
## # MF06 <dbl>, MF07 <dbl>, MF08 <dbl>, MF09 <dbl>,
## # MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
## # NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>,
## # NF11 <dbl>, NF12 <dbl>, NF13 <dbl>, NF14 <dbl>,
## # NF15 <dbl>, NF16 <dbl>
## 0 functions:
# Note that there is now an additional table with one row per taxon.
# We can also easily calculate the number of samples have reads for each taxon:
objF$data$tax_occ <- calc_n_samples(objF, "tax_abund", groups = samples_without_CL$Habitat)
## No `cols` specified, so using all numeric columns:
## BB04, BB05, BB06, BB07, BB08 ... NF12, NF13, NF14, NF15, NF16
## Calculating number of samples with non-zero counts from 46 columns in 5 groups for 144 observations
print(objF)
## <Taxmap>
## 144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
## 144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
## 4 data sets:
## tax_data:
## # A tibble: 536 x 47
## taxon_id BB04 BB05 BB06 BB07 BB08 BB09 BB10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ao 0 0 0 0 0 0 0
## 2 ag 0 0 0 0 0 0 0.0476
## 3 ad 0 0 0 0 0 0 0
## # ... with 533 more rows, and 39 more variables:
## # EC01 <dbl>, EC05 <dbl>, EC06 <dbl>, EC07 <dbl>,
## # EC08 <dbl>, EC09 <dbl>, EC10 <dbl>, JC01 <dbl>,
## # JC02 <dbl>, JC03 <dbl>, JC05 <dbl>, JC06 <dbl>,
## # JC07 <dbl>, JC09 <dbl>, JC10 <dbl>, JC11 <dbl>,
## # JC12 <dbl>, MF01 <dbl>, MF02 <dbl>, MF04 <dbl>,
## # MF05 <dbl>, MF06 <dbl>, MF07 <dbl>, MF08 <dbl>,
## # MF09 <dbl>, MF10 <dbl>, NF02 <dbl>, NF05 <dbl>,
## # NF06 <dbl>, NF07 <dbl>, NF08 <dbl>, NF09 <dbl>,
## # NF10 <dbl>, NF11 <dbl>, NF12 <dbl>, NF13 <dbl>,
## # NF14 <dbl>, NF15 <dbl>, NF16 <dbl>
## class_data:
## # A tibble: 2,112 x 5
## taxon_id input_index tax_rank tax_name regex_match
## <chr> <int> <chr> <chr> <chr>
## 1 ab 1 r root r__root
## 2 ac 1 p Arthropoda p__Arthropoda
## 3 ad 1 c Insecta c__Insecta
## # ... with 2,109 more rows
## tax_abund:
## # A tibble: 144 x 47
## taxon_id BB04 BB05 BB06 BB07 BB08 BB09 BB10 EC01
## * <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ab 1 1 1 1 1 1 1 1
## 2 ac 1 1 1 1 1 1 1 1
## 3 ad 0.955 0.812 0.864 0.944 0.571 0.808 0.905 1
## # ... with 141 more rows, and 38 more variables:
## # EC05 <dbl>, EC06 <dbl>, EC07 <dbl>, EC08 <dbl>,
## # EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
## # JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>,
## # JC09 <dbl>, JC10 <dbl>, JC11 <dbl>, JC12 <dbl>,
## # MF01 <dbl>, MF02 <dbl>, MF04 <dbl>, MF05 <dbl>,
## # MF06 <dbl>, MF07 <dbl>, MF08 <dbl>, MF09 <dbl>,
## # MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
## # NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>,
## # NF11 <dbl>, NF12 <dbl>, NF13 <dbl>, NF14 <dbl>,
## # NF15 <dbl>, NF16 <dbl>
## tax_occ:
## # A tibble: 144 x 6
## taxon_id BB EC JC MF NF
## <chr> <int> <int> <int> <int> <int>
## 1 ab 7 7 10 9 13
## 2 ac 7 7 10 9 13
## 3 ad 7 7 10 9 13
## # ... with 141 more rows
## 0 functions:
# Plotting taxonomic data
# Now that we have per-taxon information, we can plot the information using heat trees. The code below plots the number of “Nose” samples that have reads for each taxon. It also plots the number of OTUs assigned to each taxon in the overall dataset.
heat_tree(objF,
node_label = objF$taxon_names(),
node_size = objF$n_obs(),
node_color = objF$data$tax_occ$NF,
node_size_axis_label = "OTU count",
node_color_axis_label = "Samples with reads")
# Comparing any number of treatments/groups
objF$data$diff_table <- compare_groups(objF, dataset = "tax_abund",
cols = samples_without_CL$Site,
groups = samples_without_CL$Habitat)
print(objF$data$diff_table)
## # A tibble: 1,440 x 7
## taxon_id treatment_1 treatment_2 log2_median_rat… median_diff mean_diff
## * <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 ab BB EC 0 0 0
## 2 ac BB EC 0 0 0
## 3 ad BB EC -0.0860 -0.0530 -0.0778
## 4 ae BB EC -0.585 -0.0238 0.0236
## 5 af BB EC 0 0 -0.0152
## 6 ag BB EC -1.87 -0.242 -0.209
## 7 ah BB EC -0.585 -0.0238 0.0236
## 8 ai BB EC 1.13 0.148 0.147
## 9 aj BB EC 0 0 -0.0130
## 10 ak BB EC 0 0 -0.00595
## # ... with 1,430 more rows, and 1 more variable: wilcox_p_value <dbl>
heat_tree_matrix(objF,
dataset = "diff_table",
node_size = n_obs,
node_label = taxon_names,
node_color = log2_median_ratio,
node_color_range = diverging_palette(),
node_color_trans = "linear",
node_color_interval = c(-3, 3),
edge_color_interval = c(-3, 3),
node_size_axis_label = "Number of OTUs",
node_color_axis_label = "Log2 ratio median proportions")
communityB <- community
communityB[communityB>1] <- 1 # binary
rownames(communityB) # 61 rows = sites
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14"
## [15] "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28"
## [29] "29" "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42"
## [43] "43" "44" "45" "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56"
## [57] "57" "58" "59" "60" "61"
#beanplot(specnumber(communityB)~habitatN$Habitat, col = c("grey", "white"))
beanplot(specnumber(communityB)~habitatN$Habitat, col = c("grey", "white"), xlab = "Habitat type", ylab = "Species richness")
BB <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("BB"))
CL <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("CL"))
EC <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("EC"))
JC <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("JC"))
MF <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("MF"))
NF <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("NF"))
BB_obs <- rowSums(BB)
CL_obs <- rowSums(CL)
EC_obs <- rowSums(EC)
JC_obs <- rowSums(JC)
MF_obs <- rowSums(MF)
NF_obs <- rowSums(NF)
t.test(BB_obs, CL_obs)
##
## Welch Two Sample t-test
##
## data: BB_obs and CL_obs
## t = -2.4614, df = 17.68, p-value = 0.02437
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -15.473257 -1.212457
## sample estimates:
## mean of x mean of y
## 18.85714 27.20000
t.test(BB_obs, EC_obs)
##
## Welch Two Sample t-test
##
## data: BB_obs and EC_obs
## t = 1.2365, df = 11.86, p-value = 0.2402
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.276104 11.847533
## sample estimates:
## mean of x mean of y
## 18.85714 14.57143
t.test(BB_obs, JC_obs)
##
## Welch Two Sample t-test
##
## data: BB_obs and JC_obs
## t = 1.7179, df = 14.006, p-value = 0.1078
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.355966 12.270252
## sample estimates:
## mean of x mean of y
## 18.85714 13.40000
t.test(BB_obs, MF_obs)
##
## Welch Two Sample t-test
##
## data: BB_obs and MF_obs
## t = 0.28826, df = 13.88, p-value = 0.7774
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.241872 8.178380
## sample estimates:
## mean of x mean of y
## 18.85714 17.88889
t.test(BB_obs, NF_obs)
##
## Welch Two Sample t-test
##
## data: BB_obs and NF_obs
## t = -1.3026, df = 17.574, p-value = 0.2095
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -15.464041 3.639866
## sample estimates:
## mean of x mean of y
## 18.85714 24.76923
t.test(CL_obs, EC_obs)
##
## Welch Two Sample t-test
##
## data: CL_obs and EC_obs
## t = 3.5305, df = 16.24, p-value = 0.002725
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 5.054716 20.202427
## sample estimates:
## mean of x mean of y
## 27.20000 14.57143
t.test(CL_obs, JC_obs)
##
## Welch Two Sample t-test
##
## data: CL_obs and JC_obs
## t = 4.1851, df = 22.811, p-value = 0.0003602
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6.975625 20.624375
## sample estimates:
## mean of x mean of y
## 27.2 13.4
t.test(CL_obs, MF_obs)
##
## Welch Two Sample t-test
##
## data: CL_obs and MF_obs
## t = 2.6807, df = 20.549, p-value = 0.01416
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.078197 16.544025
## sample estimates:
## mean of x mean of y
## 27.20000 17.88889
t.test(CL_obs, NF_obs)
##
## Welch Two Sample t-test
##
## data: CL_obs and NF_obs
## t = 0.52569, df = 20.725, p-value = 0.6047
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.193017 12.054556
## sample estimates:
## mean of x mean of y
## 27.20000 24.76923
t.test(EC_obs, JC_obs)
##
## Welch Two Sample t-test
##
## data: EC_obs and JC_obs
## t = 0.34698, df = 13.14, p-value = 0.7341
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.114172 8.457030
## sample estimates:
## mean of x mean of y
## 14.57143 13.40000
t.test(EC_obs, MF_obs)
##
## Welch Two Sample t-test
##
## data: EC_obs and MF_obs
## t = -0.935, df = 13.446, p-value = 0.3663
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -10.95686 4.32194
## sample estimates:
## mean of x mean of y
## 14.57143 17.88889
t.test(EC_obs, NF_obs)
##
## Welch Two Sample t-test
##
## data: EC_obs and NF_obs
## t = -2.1789, df = 17.931, p-value = 0.04293
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -20.0335211 -0.3620833
## sample estimates:
## mean of x mean of y
## 14.57143 24.76923
t.test(JC_obs, MF_obs)
##
## Welch Two Sample t-test
##
## data: JC_obs and MF_obs
## t = -1.3744, df = 16.518, p-value = 0.1877
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.394902 2.417124
## sample estimates:
## mean of x mean of y
## 13.40000 17.88889
t.test(JC_obs, NF_obs)
##
## Welch Two Sample t-test
##
## data: JC_obs and NF_obs
## t = -2.5433, df = 18.265, p-value = 0.02023
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -20.751158 -1.987304
## sample estimates:
## mean of x mean of y
## 13.40000 24.76923
t.test(MF_obs, NF_obs)
##
## Welch Two Sample t-test
##
## data: MF_obs and NF_obs
## t = -1.4952, df = 18.868, p-value = 0.1514
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -16.516129 2.755445
## sample estimates:
## mean of x mean of y
## 17.88889 24.76923
p_values <- c(0.0244, 0.2402, 0.1078, 0.7774, 0.2095, 0.0027, 0.0004, 0.0142, 0.6047, 0.7341, 0.3663, 0.0429, 0.1877, 0.0202, 0.1514)
p_values.corr.fdr<-p.adjust(p_values, method = "fdr", n = length(p_values))
p_values.corr.fdr
## [1] 0.0732000 0.3275455 0.2310000 0.7774000 0.3142500 0.0202500 0.0060000
## [8] 0.0710000 0.6977308 0.7774000 0.4578750 0.1072500 0.3128333 0.0732000
## [15] 0.2838750
# 0.0732000 0.3275455 0.2310000 0.7774000 0.3142500 0.0202500 0.0060000 0.0710000
# 0.6977308 0.7774000 0.4578750 0.1072500 0.3128333 0.0732000 0.2838750
######## otu table with original reads number
(pool1 <- specpool(communityB, habitatN$Habitat))
## Species chao chao.se jack1 jack1.se jack2 boot
## BB 83 185.9796 39.89775 132.71429 20.90210 165.8095 104.1117
## CL 194 336.1658 37.44452 295.73333 32.32777 358.8143 238.0445
## EC 64 115.4592 22.33722 99.14286 16.06619 120.0952 79.3403
## JC 85 209.6154 48.52236 139.00000 23.08246 177.7556 107.5053
## MF 119 405.5079 98.01090 203.44444 33.92075 267.8056 153.4993
## NF 210 507.3394 72.71267 346.61538 48.44615 445.4744 266.7256
## boot.se n
## BB 8.989364 7
## CL 15.679575 15
## EC 7.646678 7
## JC 10.816630 10
## MF 14.358124 9
## NF 22.180423 13
Species chao chao.se jack1 jack1.se jack2 boot boot.se n BB 83 185.9796 39.89775 132.71429 20.90210 165.8095 104.1117 8.989364 7 CL 194 336.1658 37.44452 295.73333 32.32777 358.8143 238.0445 15.679575 15 EC 64 115.4592 22.33722 99.14286 16.06619 120.0952 79.3403 7.646678 7 JC 85 209.6154 48.52236 139.00000 23.08246 177.7556 107.5053 10.816630 10 MF 119 405.5079 98.01090 203.44444 33.92075 267.8056 153.4993 14.358124 9 NF 210 507.3394 72.71267 346.61538 48.44615 445.4744 266.7256 22.180423 13
es_value <- c(185.9796, 115.4592, 209.6154, 405.5079, 336.1658, 507.3394)
se <- c(39.89775, 22.33722, 48.52236, 98.01090, 37.44452, 72.71267)
error.bar <- function(x, y, upper, lower=upper, length=0.1,...){
if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper))
stop("vectors must be same length")
arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)
}
par(mfrow=c(1,2))
bar_es <- barplot(es_value, names.arg = c("BB", "EC", "JC", "MF", "CL", "NF"), col = "lightblue", ylim = c(0, 600), border = NA, ylab = "Species richness estimates")
error.bar(bar_es,es_value, se)
par(mfrow=c(1,1))
############ This function (t.test2) will calculate Welch's test
t.test2 <- function(m1, m2, s1, s2, n1, n2, m0=0, equal.variance=FALSE)
{
if( equal.variance==FALSE )
{
se <- sqrt( (s1^2/n1) + (s2^2/n2) )
# welch-satterthwaite df
df <- ( (s1^2/n1 + s2^2/n2)^2 )/( (s1^2/n1)^2/(n1-1) + (s2^2/n2)^2/(n2-1) )
} else
{
# pooled standard deviation, scaled by the sample sizes
se <- sqrt( (1/n1 + 1/n2) * ((n1-1)*s1^2 + (n2-1)*s2^2)/(n1+n2-2) )
df <- n1+n2-2
}
t <- (m1-m2-m0)/se
dat <- c(m1-m2, se, t, round(df,1), 2*pt(-abs(t),df))
names(dat) <- c("Difference of means", "Std Error", "t", "df", "p-value")
return(dat)
}
pool1[1, 9]
## [1] 7
# t test for BB and CL
t.test2(pool1[1, 2], pool1[2, 2], pool1[1, 3]*sqrt(pool1[1, 9]), pool1[2, 3]*sqrt(pool1[2, 9]), pool1[1, 9], pool1[2, 9])
## Difference of means Std Error t
## -150.18622013 54.71674629 -2.74479442
## df p-value
## 15.90000000 0.01443101
# t test for BB and EC
t.test2(pool1[1, 2], pool1[3, 2], pool1[1, 3]*sqrt(pool1[1, 9]), pool1[3, 3]*sqrt(pool1[3, 9]), pool1[1, 9], pool1[3, 9])
## Difference of means Std Error t
## 70.5204082 45.7250681 1.5422702
## df p-value
## 9.4000000 0.1558858
# t test for BB and JC
t.test2(pool1[1, 2], pool1[4, 2], pool1[1, 3]*sqrt(pool1[1, 9]), pool1[4, 3]*sqrt(pool1[4, 9]), pool1[1, 9], pool1[4, 9])
## Difference of means Std Error t
## -23.6357928 62.8191847 -0.3762512
## df p-value
## 15.0000000 0.7119989
# t test for BB and MF
t.test2(pool1[1, 2], pool1[5, 2], pool1[1, 3]*sqrt(pool1[1, 9]), pool1[5, 3]*sqrt(pool1[5, 9]), pool1[1, 9], pool1[5, 9])
## Difference of means Std Error t
## -219.52834467 105.82044772 -2.07453615
## df p-value
## 10.50000000 0.06350765
# t test for BB and NF
t.test2(pool1[1, 2], pool1[6, 2], pool1[1, 3]*sqrt(pool1[1, 9]), pool1[6, 3]*sqrt(pool1[6, 9]), pool1[1, 9], pool1[6, 9])
## Difference of means Std Error t
## -321.35977468 82.93951227 -3.87462822
## df p-value
## 17.20000000 0.00119455
# t test for CL and EC
t.test2(pool1[2, 2], pool1[3, 2], pool1[2, 3]*sqrt(pool1[2, 9]), pool1[3, 3]*sqrt(pool1[3, 9]), pool1[2, 9], pool1[3, 9])
## Difference of means Std Error t
## 2.207066e+02 4.360096e+01 5.061967e+00
## df p-value
## 1.990000e+01 6.078428e-05
# t test for CL and JC
t.test2(pool1[2, 2], pool1[4, 2], pool1[2, 3]*sqrt(pool1[2, 9]), pool1[4, 3]*sqrt(pool1[4, 9]), pool1[2, 9], pool1[4, 9])
## Difference of means Std Error t
## 126.55042735 61.29038815 2.06476792
## df p-value
## 18.70000000 0.05312766
# t test for CL and MF
t.test2(pool1[2, 2], pool1[5, 2], pool1[2, 3]*sqrt(pool1[2, 9]), pool1[5, 3]*sqrt(pool1[5, 9]), pool1[2, 9], pool1[5, 9])
## Difference of means Std Error t
## -69.3421245 104.9201070 -0.6609041
## df p-value
## 10.4000000 0.5230721
# t test for CL and NF
t.test2(pool1[2, 2], pool1[6, 2], pool1[2, 3]*sqrt(pool1[2, 9]), pool1[6, 3]*sqrt(pool1[6, 9]), pool1[2, 9], pool1[6, 9])
## Difference of means Std Error t
## -171.17355455 81.78767880 -2.09290149
## df p-value
## 18.10000000 0.05069616
# t test for EC and JC
t.test2(pool1[3, 2], pool1[4, 2], pool1[3, 3]*sqrt(pool1[3, 9]), pool1[4, 3]*sqrt(pool1[4, 9]), pool1[3, 9], pool1[4, 9])
## Difference of means Std Error t
## -94.1562009 53.4169562 -1.7626650
## df p-value
## 12.4000000 0.1025966
# t test for EC and MF
t.test2(pool1[3, 2], pool1[5, 2], pool1[3, 3]*sqrt(pool1[3, 9]), pool1[5, 3]*sqrt(pool1[5, 9]), pool1[3, 9], pool1[5, 9])
## Difference of means Std Error t
## -290.0487528 100.5240687 -2.8853662
## df p-value
## 8.8000000 0.0183903
# t test for EC and NF
t.test2(pool1[3, 2], pool1[6, 2], pool1[3, 3]*sqrt(pool1[3, 9]), pool1[6, 3]*sqrt(pool1[6, 9]), pool1[3, 9], pool1[6, 9])
## Difference of means Std Error t
## -3.918802e+02 7.606631e+01 -5.151823e+00
## df p-value
## 1.410000e+01 1.430669e-04
# t test for JC and MF
t.test2(pool1[4, 2], pool1[5, 2], pool1[4, 3]*sqrt(pool1[4, 9]), pool1[5, 3]*sqrt(pool1[5, 9]), pool1[4, 9], pool1[5, 9])
## Difference of means Std Error t
## -195.89255189 109.36432924 -1.79119237
## df p-value
## 11.80000000 0.09898252
# t test for JC and NF
t.test2(pool1[4, 2], pool1[6, 2], pool1[4, 3]*sqrt(pool1[4, 9]), pool1[6, 3]*sqrt(pool1[6, 9]), pool1[4, 9], pool1[6, 9])
## Difference of means Std Error t
## -2.977240e+02 8.741597e+01 -3.405831e+00
## df p-value
## 1.980000e+01 2.830142e-03
# t test for MF and NF
t.test2(pool1[5, 2], pool1[6, 2], pool1[5, 3]*sqrt(pool1[5, 9]), pool1[6, 3]*sqrt(pool1[6, 9]), pool1[5, 9], pool1[6, 9])
## Difference of means Std Error t
## -101.8314300 122.0379828 -0.8344241
## df p-value
## 16.0000000 0.4163281
# BB_CL, BB_EC, BB_JC, BB_MF, BB_NF, CL_EC, CL_JC, CL_MF, CL_NF, EC_JC, EC_MF, EC_NF, JC_MF, JC_NF, MF_NF
p_values <- c(0.0144, 0.1530, 0.7236, 0.0613, 0.0012, 0.0001, 0.0515, 0.4953, 0.0519, 0.1041, 0.0182, 0.0001, 0.0933, 0.0028, 0.4543)
p_values.corr.fdr<-p.adjust(p_values, method = "fdr", n = length(p_values))
p_values.corr.fdr
## [1] 0.0432000 0.1912500 0.7236000 0.1021667 0.0060000 0.0007500 0.0973125
## [8] 0.5306786 0.0973125 0.1419545 0.0455000 0.0007500 0.1399500 0.0105000
## [15] 0.5241923
# 0.0432000 0.1912500 0.7236000 0.1021667 0.0060000 0.0007500 0.0973125 0.5306786
# 0.0973125 0.1419545 0.0455000 0.0007500 0.1399500 0.0105000 0.5241923
# http://chao.stat.nthu.edu.tw/wordpress/wp-content/uploads/software/iNEXT_UserGuide.pdf
cname <- c("BB","CL","EC","JC","MF","NF")
comm4inext_abun <- matrix(c(colSums(BB), colSums(CL), colSums(EC), colSums(JC), colSums(MF), colSums(NF)), ncol = 6)
colnames(comm4inext_abun) <- cname
colnameBB <- colnames(BB)
rownames(comm4inext_abun) <- colnameBB
comm4inext <- rbind(c(nrow(BB), nrow(CL), nrow(EC), nrow(JC), nrow(MF), nrow(NF)), comm4inext_abun) #
#comm4inext
confnum=0.95 # set confidence here
outcomm0 <- iNEXT(comm4inext, q=0, conf=confnum, datatype="incidence_freq")
# Hill numbers (q): 0 = sp richness, 1 = Shannon, 2 = inverse Simpson
outcomm0$DataInfo
## site T U S.obs SC Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10
## 1 BB 7 132 83 0.5933 58 14 3 3 5 0 0 0 0 0
## 2 CL 15 408 194 0.7458 109 39 19 9 1 11 2 0 2 0
## 3 EC 7 102 64 0.6391 41 14 3 6 0 0 0 0 0 0
## 4 JC 10 134 85 0.5728 60 13 6 3 1 1 1 0 0 0
## 5 MF 9 161 119 0.4309 95 14 4 5 0 1 0 0 0 0
## 6 NF 13 322 210 0.5573 148 34 15 8 3 0 2 0 0 0
ChaoRichness(comm4inext, datatype="incidence_freq") # same as specpool results, so i trust that we have done this correctly
## Observed Estimator Est_s.e. 95% Lower 95% Upper
## BB 83 185.980 39.898 132.480 297.326
## CL 194 336.166 37.445 279.575 430.179
## EC 64 115.459 22.337 86.793 180.178
## JC 85 209.615 48.522 144.670 345.246
## MF 119 405.508 98.011 268.269 668.926
## NF 210 507.339 72.713 395.401 686.863
ChaoShannon(comm4inext, datatype="incidence_freq")
## Observed Estimator Est_s.e 95% Lower 95% Upper
## BB 4.230 4.899 0.134 4.637 5.162
## CL 4.974 5.397 0.065 5.270 5.525
## EC 4.012 4.577 0.143 4.297 4.856
## JC 4.250 4.987 0.154 4.684 5.290
## MF 4.640 5.702 0.175 5.359 6.046
## NF 5.177 5.938 0.098 5.746 6.130
outI <- iNEXT(comm4inext, q=c(0,1,2), conf=confnum, datatype="incidence_freq")
# Sample‐size‐based R/E curves, separating by "site"
ggiNEXT(outI, type=1, facet.var="site") +theme_bw(base_size = 18)
# Sample‐size‐based R/E curves, separating by "order"
ggiNEXT(outI, type=1, facet.var="order")+theme_bw(base_size = 18)
commPD <- communityB
#remove 3 OTUs with very long branches
commPD$Cluster418132 <- NULL
commPD$Cluster636975 <- NULL
commPD$Cluster549885 <- NULL
BB <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("BB"))
CL <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("CL"))
EC <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("EC"))
JC <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("JC"))
MF <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("MF"))
NF <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("NF"))
comm4inextPD <- matrix(c(colSums(BB), colSums(CL), colSums(EC), colSums(JC), colSums(MF), colSums(NF)), ncol = 6)
colnames(comm4inextPD) <- cname
colnameBB <- colnames(BB)
rownames(comm4inextPD) <- colnameBB
MLtree.tre <- read.table("./data/2014MBC_535otu-2collembola_align533.newick")
ML.tre <- ade4::newick2phylog(MLtree.tre$V1)
ML.lab <- rownames(comm4inextPD)
#rownames(comm4inext_abun)
BBnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("BB"))
CLnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("CL"))
ECnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("EC"))
JCnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("JC"))
MFnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("MF"))
NFnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("NF"))
rownames(BB) <- BBnames$Site
rownames(CL) <- CLnames$Site
rownames(EC) <- ECnames$Site
rownames(JC) <- JCnames$Site
rownames(MF) <- MFnames$Site
rownames(NF) <- NFnames$Site
BBnames$Site
## [1] "BB04" "BB05" "BB06" "BB07" "BB08" "BB09" "BB10"
#commB <- list(BB.Site = t(BB), CL.Site = t(CL), EC.Site = t(EC), JC.Site = t(JC), MF.Site = t(MF), NF.Site = t(NF))
commB <- list(BB = t(BB), CL = t(CL), EC = t(EC), JC = t(JC), MF = t(MF), NF = t(NF))
out <- iNextPD(commB, ML.lab, ML.tre, q=c(0, 1, 2), datatype="incidence_raw", endpoint = 30, se = TRUE)
# Sample‐size‐based R/E curves, separating by "site""
ggiNEXT(out, type=1, facet.var="site") +theme_bw(base_size = 18)
# Sample‐size‐based R/E curves, separating by "order"
ggiNEXT(out, type=1, facet.var="order")+theme_bw(base_size = 18)
# display black‐white theme
ggiNEXT(out, type=1, facet.var="order", grey=TRUE)
table.phylog(comm4inextPD, ML.tre, csize=2, f.phylog=0.7)
community.jmds <- metaMDS(communityB, distance = "jaccard", trymax = 40, binary=TRUE)
## Run 0 stress 0.1937798
## Run 1 stress 0.1993032
## Run 2 stress 0.2046784
## Run 3 stress 0.2039088
## Run 4 stress 0.1938151
## ... Procrustes: rmse 0.04310288 max resid 0.1929209
## Run 5 stress 0.1941205
## ... Procrustes: rmse 0.05150645 max resid 0.2706739
## Run 6 stress 0.1951905
## Run 7 stress 0.2050531
## Run 8 stress 0.2088771
## Run 9 stress 0.199114
## Run 10 stress 0.1928347
## ... New best solution
## ... Procrustes: rmse 0.03670751 max resid 0.1093344
## Run 11 stress 0.1966296
## Run 12 stress 0.1918593
## ... New best solution
## ... Procrustes: rmse 0.03193756 max resid 0.1400837
## Run 13 stress 0.1965023
## Run 14 stress 0.1928589
## Run 15 stress 0.1937054
## Run 16 stress 0.203305
## Run 17 stress 0.1979156
## Run 18 stress 0.1927809
## Run 19 stress 0.1935329
## Run 20 stress 0.1911866
## ... New best solution
## ... Procrustes: rmse 0.02706889 max resid 0.1126668
## Run 21 stress 0.19505
## Run 22 stress 0.1932218
## Run 23 stress 0.1932825
## Run 24 stress 0.2052633
## Run 25 stress 0.1980282
## Run 26 stress 0.1931746
## Run 27 stress 0.1981246
## Run 28 stress 0.1987869
## Run 29 stress 0.2031158
## Run 30 stress 0.1959541
## Run 31 stress 0.1921867
## Run 32 stress 0.198956
## Run 33 stress 0.1920951
## Run 34 stress 0.2008074
## Run 35 stress 0.19739
## Run 36 stress 0.2005727
## Run 37 stress 0.1995965
## Run 38 stress 0.2034736
## Run 39 stress 0.2009664
## Run 40 stress 0.2056193
## *** No convergence -- monoMDS stopping criteria:
## 1: no. of iterations >= maxit
## 39: stress ratio > sratmax
community.jmds <- metaMDS(communityB, distance = "jaccard", binary = TRUE, previous.best = community.jmds) # doesn't converge well, with final stress > 0.20
## Starting from 2-dimensional configuration
## Run 0 stress 0.1911866
## Run 1 stress 0.1913813
## ... Procrustes: rmse 0.02121749 max resid 0.08645838
## Run 2 stress 0.1926701
## Run 3 stress 0.1962628
## Run 4 stress 0.1914205
## ... Procrustes: rmse 0.009650509 max resid 0.06445552
## Run 5 stress 0.1927342
## Run 6 stress 0.1944217
## Run 7 stress 0.2033203
## Run 8 stress 0.2119161
## Run 9 stress 0.1914252
## ... Procrustes: rmse 0.009888332 max resid 0.06567271
## Run 10 stress 0.1999456
## Run 11 stress 0.1918427
## Run 12 stress 0.2065167
## Run 13 stress 0.1921984
## Run 14 stress 0.1920911
## Run 15 stress 0.1916466
## ... Procrustes: rmse 0.01553547 max resid 0.06001273
## Run 16 stress 0.2049019
## Run 17 stress 0.1928554
## Run 18 stress 0.1965603
## Run 19 stress 0.1996184
## Run 20 stress 0.2064503
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
stressplot(community.jmds)
tabasco(community, use = community.jmds, labCol = habitatN$Habitat, col = brewer.pal(3, "Oranges"))
communityBbetapart <- bind_cols(habitatN, communityB)
communityBbetapart <- communityBbetapart %>% dplyr::select(-c(Habitat:sprichness))
JCNF <- communityBbetapart %>% dplyr::filter(habitatN$Habitat %in% c("JC", "NF")) %>% column_to_rownames(var="Site")
BBNF <- communityBbetapart %>% dplyr::filter(habitatN$Habitat %in% c("BB", "NF")) %>% column_to_rownames(var="Site")
CLNF <- communityBbetapart %>% dplyr::filter(habitatN$Habitat %in% c("CL", "NF")) %>% column_to_rownames(var="Site")
ECNF <- communityBbetapart %>% dplyr::filter(habitatN$Habitat %in% c("EC", "NF")) %>% column_to_rownames(var="Site")
MFNF <- communityBbetapart %>% dplyr::filter(habitatN$Habitat %in% c("MF", "NF")) %>% column_to_rownames(var="Site")
JCNF.multi.dist <- beta.multi(JCNF, index.family="jac")
BBNF.multi.dist <- beta.multi(BBNF, index.family="jac")
CLNF.multi.dist <- beta.multi(CLNF, index.family="jac")
ECNF.multi.dist <- beta.multi(ECNF, index.family="jac")
MFNF.multi.dist <- beta.multi(MFNF, index.family="jac")
multi.all <- list(JCNF = JCNF.multi.dist, BBNF = BBNF.multi.dist, CLNF = CLNF.multi.dist, ECNF = ECNF.multi.dist, MFNF = MFNF.multi.dist)
ALL.dist <- communityBbetapart %>% column_to_rownames(var="Site") %>% beta.pair(index.family="jac")
ALL.dist.subset <- ALL.dist[["beta.jne"]]
ALL.dist.jne.jmds <- metaMDS(ALL.dist.subset)
## Run 0 stress 0.2878738
## Run 1 stress 0.3102284
## Run 2 stress 0.302938
## Run 3 stress 0.2986731
## Run 4 stress 0.2978615
## Run 5 stress 0.3063189
## Run 6 stress 0.3012631
## Run 7 stress 0.2994175
## Run 8 stress 0.2990311
## Run 9 stress 0.3016207
## Run 10 stress 0.301279
## Run 11 stress 0.3037666
## Run 12 stress 0.30188
## Run 13 stress 0.3012636
## Run 14 stress 0.298905
## Run 15 stress 0.3047443
## Run 16 stress 0.3023006
## Run 17 stress 0.2977708
## Run 18 stress 0.2984843
## Run 19 stress 0.3031377
## Run 20 stress 0.3051306
## *** No convergence -- monoMDS stopping criteria:
## 1: no. of iterations >= maxit
## 19: stress ratio > sratmax
ALL.dist.jne.jmds <- metaMDS(ALL.dist.subset, previous.best = ALL.dist.jne.jmds)
## Starting from 2-dimensional configuration
## Run 0 stress 0.2878738
## Run 1 stress 0.2998056
## Run 2 stress 0.3002205
## Run 3 stress 0.3079505
## Run 4 stress 0.3024952
## Run 5 stress 0.3072661
## Run 6 stress 0.3016174
## Run 7 stress 0.2989858
## Run 8 stress 0.2990639
## Run 9 stress 0.2981019
## Run 10 stress 0.3020738
## Run 11 stress 0.3063326
## Run 12 stress 0.3064975
## Run 13 stress 0.3044563
## Run 14 stress 0.3052725
## Run 15 stress 0.2992804
## Run 16 stress 0.3039105
## Run 17 stress 0.3057486
## Run 18 stress 0.3074614
## Run 19 stress 0.3015778
## Run 20 stress 0.2995792
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
stressplot(ALL.dist.jne.jmds)
ALL.dist.subset <- ALL.dist[["beta.jtu"]]
ALL.dist.jtu.jmds <- metaMDS(ALL.dist.subset)
## Run 0 stress 0.1964351
## Run 1 stress 0.195639
## ... New best solution
## ... Procrustes: rmse 0.04923861 max resid 0.1820483
## Run 2 stress 0.2031976
## Run 3 stress 0.1946288
## ... New best solution
## ... Procrustes: rmse 0.0267172 max resid 0.1613364
## Run 4 stress 0.2044537
## Run 5 stress 0.2035156
## Run 6 stress 0.1995786
## Run 7 stress 0.2016852
## Run 8 stress 0.2062343
## Run 9 stress 0.2025288
## Run 10 stress 0.1952773
## Run 11 stress 0.1983759
## Run 12 stress 0.198087
## Run 13 stress 0.1979433
## Run 14 stress 0.2045171
## Run 15 stress 0.195977
## Run 16 stress 0.2054783
## Run 17 stress 0.1980396
## Run 18 stress 0.2003249
## Run 19 stress 0.2061029
## Run 20 stress 0.1970336
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
ALL.dist.jtu.jmds <- metaMDS(ALL.dist.subset, previous.best = ALL.dist.jne.jmds)
## Starting from 2-dimensional configuration
## Stress differs from 'previous.best': reset tries
## Run 0 stress 0.3873318
## Run 1 stress 0.1943429
## ... New best solution
## ... Procrustes: rmse 0.1279321 max resid 0.1960386
## Run 2 stress 0.1945287
## ... Procrustes: rmse 0.01745824 max resid 0.05851616
## Run 3 stress 0.2149575
## Run 4 stress 0.2076494
## Run 5 stress 0.2010388
## Run 6 stress 0.1965874
## Run 7 stress 0.2082553
## Run 8 stress 0.1943545
## ... Procrustes: rmse 0.01666206 max resid 0.07883044
## Run 9 stress 0.2026224
## Run 10 stress 0.1947925
## ... Procrustes: rmse 0.01806205 max resid 0.05953726
## Run 11 stress 0.1990639
## Run 12 stress 0.1951838
## Run 13 stress 0.2033034
## Run 14 stress 0.1974786
## Run 15 stress 0.1951864
## Run 16 stress 0.2038948
## Run 17 stress 0.1959615
## Run 18 stress 0.2126981
## Run 19 stress 0.2073787
## Run 20 stress 0.1944445
## ... Procrustes: rmse 0.01415498 max resid 0.06369613
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
stressplot(ALL.dist.jtu.jmds)
# , ylim = c(-0.5, 0.5), xlim = c(-0.4, 0.4)
par(mfrow=c(2,2))
colvec <- brewer.pal(5, "Set1")
with(habitatN, ordisurf(community.jmds, sprichness, main="All beta diversity", cex=0.5, col = "white"), ylim = c(-0.5, 0.5))
##
## Family: gaussian
## Link function: identity
##
## Formula:
## y ~ s(x1, x2, k = 10, bs = "tp", fx = FALSE)
##
## Estimated degrees of freedom:
## 5.17 total = 6.17
##
## REML score: 221.1028
# plot(community.jmds, main = "All beta diversity", ylim = c(-0.4, 0.4))
with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("BB"))))
with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[1]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("CL"))))
with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("EC"))))
with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("JC"))))
with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("MF"))))
with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("NF"))))
plot(ALL.dist.jtu.jmds, main = "Turnover beta diversity only", ylim = c(-0.5, 0.5))
with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("BB"))))
with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[1]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("CL"))))
with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("EC"))))
with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("JC"))))
with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("MF"))))
with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("NF"))))
plot(ALL.dist.jne.jmds, main = "Nestedness beta diversity only", ylim = c(-0.5, 0.5))
with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("BB"))))
with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[1]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("CL"))))
with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("EC"))))
with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("JC"))))
with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("MF"))))
with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("NF"))))
par(mfrow=c(1,1))
IMPORTANT
communityB <- communityB[, which(specnumber(communityB, MARGIN=2) > 1)]
# 269 species remained
### do NMDS analysis to quickly see patterns ####
community.jmds <- metaMDS(communityB, distance = "jaccard", trymax = 40, binary=TRUE)
## Run 0 stress 0.1941036
## Run 1 stress 0.1898452
## ... New best solution
## ... Procrustes: rmse 0.05543086 max resid 0.2628178
## Run 2 stress 0.2028075
## Run 3 stress 0.1906199
## Run 4 stress 0.1901219
## ... Procrustes: rmse 0.03627274 max resid 0.1447797
## Run 5 stress 0.1893944
## ... New best solution
## ... Procrustes: rmse 0.02812124 max resid 0.09950703
## Run 6 stress 0.2035494
## Run 7 stress 0.1941584
## Run 8 stress 0.1920113
## Run 9 stress 0.1940623
## Run 10 stress 0.1945931
## Run 11 stress 0.1979348
## Run 12 stress 0.2039438
## Run 13 stress 0.1942227
## Run 14 stress 0.1962865
## Run 15 stress 0.2009856
## Run 16 stress 0.1988058
## Run 17 stress 0.2046388
## Run 18 stress 0.1898469
## ... Procrustes: rmse 0.04070436 max resid 0.1672788
## Run 19 stress 0.2101562
## Run 20 stress 0.2034526
## Run 21 stress 0.1904331
## Run 22 stress 0.1938473
## Run 23 stress 0.1997294
## Run 24 stress 0.1911799
## Run 25 stress 0.1951652
## Run 26 stress 0.2023139
## Run 27 stress 0.1927948
## Run 28 stress 0.1986546
## Run 29 stress 0.1941592
## Run 30 stress 0.1904473
## Run 31 stress 0.1983602
## Run 32 stress 0.1900627
## Run 33 stress 0.1923577
## Run 34 stress 0.2049473
## Run 35 stress 0.1899515
## Run 36 stress 0.1931936
## Run 37 stress 0.2088232
## Run 38 stress 0.2029848
## Run 39 stress 0.2034659
## Run 40 stress 0.2073307
## *** No convergence -- monoMDS stopping criteria:
## 40: stress ratio > sratmax
community.jmds <- metaMDS(communityB, distance = "jaccard", binary = TRUE, previous.best = community.jmds) # doesn't converge well, with final stress > 0.20
## Starting from 2-dimensional configuration
## Run 0 stress 0.1893944
## Run 1 stress 0.1903028
## Run 2 stress 0.1935929
## Run 3 stress 0.1943752
## Run 4 stress 0.1991359
## Run 5 stress 0.1914793
## Run 6 stress 0.1946816
## Run 7 stress 0.2050226
## Run 8 stress 0.194464
## Run 9 stress 0.1899189
## Run 10 stress 0.1894561
## ... Procrustes: rmse 0.02454399 max resid 0.08716317
## Run 11 stress 0.2007596
## Run 12 stress 0.2015332
## Run 13 stress 0.1934242
## Run 14 stress 0.1931752
## Run 15 stress 0.1913791
## Run 16 stress 0.1925354
## Run 17 stress 0.1947614
## Run 18 stress 0.1905258
## Run 19 stress 0.1906189
## Run 20 stress 0.199274
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
stressplot(community.jmds)
Tested different error families (testing code is archived down below). Based on residuals, decided on family = “binomial,” which is good because the dataset is presence/absence. Using row.effect = “random” to get a composition-only analysis. Now rerun with many more iterations
# set up MCMC parameters
# mcmc.control <- list(n.burnin = 300, n.iteration = 1000, n.thin = 30, seed = 123) # for debugging
mcmc.control4 <- list(n.burnin = 10000, n.iteration = 40000, n.thin = 30)
# Set up priors
# Francis Hui suggests trying different priors to stabilise sampling for sparse matrices (such as we have). This isn't as important as getting the mcmc.control parameters right
#set.prior <- list(type = c("normal","normal","normal","uniform"), hypparams = c(10, 10, 10, 30), ssvs.index = ssvsindex)
# type = c("normal","normal","normal","uniform"), hypparams = c(10, 10, 10, 30) # boral default
# type = c("cauchy","cauchy","cauchy","uniform"), hypparams = c(2.5^2, 2.5^2, 2.5^2, 30) # Gelman proposed this
# type = c("normal","normal","normal","uniform"), hypparams = c(1, 1, 1, 30) ## People who developed the STAN package proposed this as one possibility.
# set up model
commB.fit.b3.4.none <- boral(communityB, family = "binomial", num.lv = 2, row.eff = "random", mcmc.control = mcmc.control4, save.model = TRUE); beep(sound = 1)
## row.ids assumed to be a matrix with one column and elements 1,2,...nrow(y) i.e., a row-specific intercept
## Warning in as.POSIXlt.POSIXct(x, tz): unknown timezone 'zone/tz/2018e.1.0/
## zoneinfo/Asia/Shanghai'
## module glm loaded
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 16287
## Unobserved stochastic nodes: 17271
## Total graph size: 133646
##
## Initializing model
# save output in case i want to do other analyses on it later
saveRDS(commB.fit.b3.4.none, "boral_commB.fit.b3.4.RDS")
# commB.fit.b3.4.none <- readRDS("boral_commB.fit.b3.4.RDS") # to restore
summary(commB.fit.b3.4.none)
## $call
## boral.default(y = communityB, family = "binomial", num.lv = 2,
## row.eff = "random", save.model = TRUE, mcmc.control = mcmc.control4)
##
## $coefficients
## coefficients
## cols beta0 theta1 theta2
## Cluster1003060 -0.061 1.885 0.000
## Cluster1003587 0.735 1.522 1.423
## Cluster1008496 -1.900 -5.033 -2.042
## Cluster1036508 -0.047 0.165 -0.940
## Cluster1036793 0.374 0.371 -0.960
## Cluster1043256 0.187 1.502 1.184
## Cluster104659 -1.599 -1.381 4.035
## Cluster1048454 -0.989 0.659 3.049
## Cluster105406 0.064 -0.649 -1.438
## Cluster1058962 -1.015 0.029 -4.471
## Cluster1082093 0.431 1.069 1.483
## Cluster1094626 -1.380 -2.515 2.730
## Cluster1111802 -1.071 -1.323 2.546
## Cluster1111808 -2.013 -1.072 -4.954
## Cluster1112567 -1.428 0.603 -3.935
## Cluster1115775 0.166 1.279 1.260
## Cluster1117056 0.205 -0.150 0.791
## Cluster1118816 -0.362 -1.143 1.256
## Cluster1120269 -0.152 1.950 1.203
## Cluster1123272 -0.362 4.552 0.066
## Cluster1143779 -0.081 -2.963 -1.791
## Cluster114519 0.045 0.133 -1.452
## Cluster1146018 -1.356 -2.201 -3.895
## Cluster1146858 -0.165 3.069 0.259
## Cluster1147021 -0.297 -2.032 -1.246
## Cluster1147433 0.000 2.918 0.926
## Cluster1151646 -0.431 -3.275 -1.399
## Cluster1154095 0.323 1.376 -0.713
## Cluster1160549 0.165 -2.361 0.024
## Cluster116476 -2.734 -2.942 4.000
## Cluster1183739 -0.130 -3.801 0.085
## Cluster1183836 0.422 0.998 -1.893
## Cluster1185877 -0.997 1.292 -5.130
## Cluster1189880 -1.433 0.366 -3.602
## Cluster1190248 -0.724 -4.689 0.809
## Cluster119182 -0.698 1.412 3.439
## Cluster1205684 0.359 -0.379 -1.213
## Cluster1206358 -2.208 1.815 -4.829
## Cluster1212246 -0.646 1.658 -2.522
## Cluster1218901 -1.008 -1.093 2.694
## Cluster1219908 -0.609 -0.264 2.778
## Cluster1228370 -1.383 -4.248 -1.807
## Cluster1230530 -1.688 4.845 -0.284
## Cluster1236621 -1.212 4.119 1.003
## Cluster1244964 -0.670 -2.060 -1.375
## Cluster1248054 0.252 -0.057 0.503
## Cluster1251081 -1.095 -1.363 2.553
## Cluster1252116 -0.338 -0.418 2.378
## Cluster1262671 0.910 0.451 -1.257
## Cluster1263180 -1.167 4.409 -2.821
## Cluster1264742 0.863 -0.891 1.839
## Cluster1270869 0.374 1.666 -1.118
## Cluster1271357 -1.459 2.180 -4.986
## Cluster1322 -0.714 -6.581 0.041
## Cluster141583 -0.002 -0.060 -1.233
## Cluster144694 -0.334 0.958 -1.025
## Cluster148932 -0.852 2.053 -3.167
## Cluster159898 -0.318 -1.530 0.793
## Cluster161999 -2.575 -0.003 4.710
## Cluster162246 -0.701 2.799 -1.427
## Cluster162949 -2.692 -2.387 4.103
## Cluster168999 -0.665 -1.197 -1.917
## Cluster169255 -0.979 -3.805 -2.020
## Cluster170040 -0.460 0.673 -2.174
## Cluster190091 0.213 -1.255 2.371
## Cluster193554 -1.956 -0.386 4.298
## Cluster193702 -0.282 3.060 1.263
## Cluster199147 -1.620 -5.293 -4.644
## Cluster207055 -0.301 -2.077 -1.182
## Cluster209030 -0.880 4.158 0.961
## Cluster21179 -0.053 -0.853 -1.193
## Cluster214234 -0.001 1.607 -1.252
## Cluster216019 -0.376 2.486 0.191
## Cluster22396 -0.791 2.282 -2.491
## Cluster230928 -0.114 -2.971 5.967
## Cluster236674 -0.158 -0.551 0.191
## Cluster238319 -0.244 -2.501 0.758
## Cluster238661 -1.238 -4.727 -0.056
## Cluster248444 -0.374 -1.145 1.340
## Cluster248703 -0.194 -0.131 -0.478
## Cluster248717 0.077 -1.495 -1.264
## Cluster251185 -0.249 -1.294 1.002
## Cluster255441 0.130 -0.506 -0.154
## Cluster256405 1.421 2.134 0.046
## Cluster258891 -1.975 2.307 -3.986
## Cluster260416 -0.690 -1.566 2.553
## Cluster264960 -1.335 -0.835 3.119
## Cluster268911 -0.471 1.784 3.262
## Cluster27114 -1.055 3.794 1.935
## Cluster278879 -0.787 3.347 -0.550
## Cluster279779 0.110 -0.103 -1.248
## Cluster283921 1.513 -1.343 -2.090
## Cluster292040 -0.295 -1.627 -0.437
## Cluster293462 -0.343 -1.840 0.707
## Cluster297322 -0.568 -2.599 0.513
## Cluster297807 -0.643 -0.371 3.189
## Cluster300625 -0.397 3.227 -0.514
## Cluster304037 -0.984 4.695 -1.301
## Cluster307655 -0.257 0.628 -0.884
## Cluster311923 -0.003 -1.393 1.745
## Cluster313551 -0.344 0.497 -2.877
## Cluster317158 -2.026 2.193 -5.545
## Cluster317705 -0.253 -1.266 -0.918
## Cluster3190 -1.222 3.930 2.096
## Cluster319598 -2.019 -4.231 -4.095
## Cluster323177 -0.417 0.262 -2.155
## Cluster328858 0.840 3.326 0.622
## Cluster331968 0.567 0.068 0.059
## Cluster337634 -1.685 4.739 -0.326
## Cluster346275 -0.207 0.440 -0.942
## Cluster346941 0.709 -1.667 2.071
## Cluster34774 -0.395 0.551 -2.129
## Cluster349803 -0.173 -1.059 0.466
## Cluster356106 0.336 1.189 1.276
## Cluster356886 -0.530 0.801 -1.640
## Cluster363044 -2.313 5.354 1.586
## Cluster366435 -0.503 2.985 1.262
## Cluster374617 -0.200 0.557 -1.657
## Cluster375655 -0.495 -1.689 -1.129
## Cluster382546 -0.455 -1.515 -0.839
## Cluster392350 -1.386 4.504 0.822
## Cluster405715 -0.385 -1.332 -0.808
## Cluster405735 -0.146 -1.639 -1.036
## Cluster409890 0.010 1.246 3.409
## Cluster411007 -0.084 1.629 1.255
## Cluster413866 -0.530 -1.456 1.682
## Cluster418132 -0.821 -4.570 -0.215
## Cluster418592 -2.445 -1.359 3.963
## Cluster421090 -0.800 -0.746 -3.071
## Cluster422387 -1.165 -4.375 -0.259
## Cluster425852 -0.730 -0.519 -2.370
## Cluster426879 -0.381 0.675 -2.579
## Cluster434789 -0.069 1.736 0.389
## Cluster436377 -0.090 1.176 1.571
## Cluster442017 0.259 -1.052 1.100
## Cluster442199 -2.687 -2.868 4.011
## Cluster446078 -0.374 -1.906 2.171
## Cluster449561 0.862 -0.104 0.793
## Cluster451245 0.647 -2.540 2.056
## Cluster45771 -2.219 1.142 -6.062
## Cluster458565 -0.472 -1.655 1.344
## Cluster465625 -2.328 0.566 4.246
## Cluster466805 -2.826 -0.227 6.347
## Cluster472574 0.443 -3.139 0.310
## Cluster474866 -0.480 -2.208 1.713
## Cluster477205 -0.877 1.412 -2.266
## Cluster47848 -0.211 -0.710 -0.885
## Cluster487185 0.322 -0.755 0.861
## Cluster4912 -0.680 2.889 -1.189
## Cluster506593 -1.979 6.289 -0.208
## Cluster507669 -2.474 1.809 -4.792
## Cluster510789 -0.214 -2.075 1.869
## Cluster511143 -1.172 -0.222 -4.791
## Cluster511497 -1.199 3.925 2.175
## Cluster512825 -0.475 0.571 -1.865
## Cluster515093 0.162 -3.041 -2.761
## Cluster515604 0.363 -0.305 -1.363
## Cluster517616 -0.146 -2.120 -1.157
## Cluster528954 -0.663 -2.779 -0.904
## Cluster53598 -0.375 2.502 1.303
## Cluster538054 0.136 0.408 1.460
## Cluster549169 -0.346 1.971 2.598
## Cluster549198 -2.544 0.809 5.321
## Cluster549885 -3.064 -0.265 6.716
## Cluster549932 -2.033 2.360 -4.071
## Cluster550517 -0.258 2.339 0.715
## Cluster551260 -0.282 2.365 0.760
## Cluster554866 0.308 -3.394 0.334
## Cluster560910 0.012 0.479 -1.207
## Cluster562057 -0.594 -2.004 1.591
## Cluster566878 0.668 0.115 1.805
## Cluster56812 -1.973 6.254 -0.260
## Cluster568976 -1.221 -0.086 4.117
## Cluster571733 -0.034 -1.522 0.723
## Cluster571940 -0.710 -1.420 2.834
## Cluster582971 -3.053 0.047 6.537
## Cluster588978 -1.992 2.225 -4.041
## Cluster599965 0.100 -0.185 -0.832
## Cluster604765 0.142 1.170 -1.519
## Cluster605186 -1.907 -2.283 -4.359
## Cluster608280 0.572 0.802 0.262
## Cluster624167 -0.217 -0.393 -1.034
## Cluster62672 -0.081 -0.094 0.248
## Cluster628263 -0.060 1.716 0.530
## Cluster629282 -0.002 0.367 0.006
## Cluster633345 -2.888 -0.061 6.366
## Cluster635168 -0.387 1.766 -0.797
## Cluster636211 -0.024 0.326 -1.598
## Cluster636975 2.153 0.868 0.590
## Cluster637214 -2.705 0.470 -6.147
## Cluster638434 -0.241 0.745 -2.146
## Cluster64231 -0.033 -0.156 -1.209
## Cluster648953 -0.860 -2.241 -2.959
## Cluster657934 -0.130 2.084 0.301
## Cluster662085 -2.304 5.506 -0.014
## Cluster667421 -0.391 -0.667 -1.365
## Cluster675458 -0.163 -0.066 0.472
## Cluster677759 -0.360 -0.015 -2.707
## Cluster680855 -0.712 -2.445 2.285
## Cluster681238 -2.005 0.228 3.858
## Cluster685794 0.604 0.272 -1.256
## Cluster689312 -0.460 -1.613 -0.984
## Cluster691418 -0.470 -1.734 -0.868
## Cluster691869 -0.239 0.201 -0.713
## Cluster695513 -0.441 1.702 -0.987
## Cluster697682 -0.618 1.685 -1.857
## Cluster714500 -0.318 1.451 -0.841
## Cluster7237 -0.164 -1.982 1.770
## Cluster731000 -2.665 -2.554 3.986
## Cluster738928 -0.417 1.859 -0.846
## Cluster742248 -1.079 -2.096 5.959
## Cluster746906 -1.546 1.123 -3.643
## Cluster750082 0.108 -0.109 -2.120
## Cluster752397 0.115 -2.332 0.684
## Cluster759071 -3.003 0.076 6.550
## Cluster761967 -2.085 2.527 -4.089
## Cluster762239 -1.762 -2.962 -4.301
## Cluster774173 -0.222 -1.775 -1.650
## Cluster779302 -3.154 0.096 -6.742
## Cluster794054 -2.978 -4.022 -4.930
## Cluster802315 0.429 -0.096 -0.117
## Cluster815919 -1.672 6.319 0.634
## Cluster816928 -0.237 1.052 -1.109
## Cluster827049 -1.877 6.119 -0.227
## Cluster827888 -3.094 -0.181 6.534
## Cluster831241 -0.510 -1.642 -1.110
## Cluster852624 -0.474 0.859 -1.410
## Cluster852926 -1.765 -5.920 -1.762
## Cluster866046 -0.179 -1.297 -1.998
## Cluster866530 1.663 -0.316 1.043
## Cluster868323 -0.740 -1.858 1.991
## Cluster870147 -1.725 3.549 3.644
## Cluster871445 -1.210 -1.118 2.941
## Cluster872255 -1.135 -1.182 2.755
## Cluster8795 -0.625 -2.822 0.608
## Cluster881515 -0.467 -0.681 -1.689
## Cluster885812 -0.682 -0.959 2.782
## Cluster892866 -2.665 -1.269 4.167
## Cluster893883 -0.437 0.503 -2.791
## Cluster896261 -0.686 1.950 2.818
## Cluster89960 -0.934 0.550 -2.760
## Cluster899998 -1.201 0.645 -3.251
## Cluster91278 -0.362 -3.417 -0.664
## Cluster91284 -0.064 -0.437 -1.717
## Cluster914193 -0.155 1.605 -0.907
## Cluster932946 -0.405 0.685 -1.328
## Cluster933654 -0.022 0.021 0.546
## Cluster933936 -0.514 -0.771 -1.751
## Cluster946116 -1.784 -5.195 -1.346
## Cluster95070 -2.444 0.740 4.419
## Cluster953387 -0.462 -1.346 3.449
## Cluster955298 -1.242 2.989 3.287
## Cluster956580 -0.530 1.306 -1.300
## Cluster956910 -0.204 -1.294 0.616
## Cluster957273 -2.138 5.399 -0.254
## Cluster957758 0.332 -1.692 -1.553
## Cluster96058 -0.712 3.482 1.358
## Cluster964340 -0.458 1.689 -1.071
## Cluster970338 -0.447 -1.801 -1.176
## Cluster970757 -0.080 -1.838 0.025
## Cluster975075 -0.038 -1.631 -1.435
## Cluster975205 -0.331 0.262 -1.040
## Cluster982015 -1.372 -3.560 -4.061
## Cluster987637 -1.604 -2.367 3.056
## Cluster991103 -0.037 -0.804 -1.295
## Cluster995806 -0.722 -2.254 1.626
## Cluster99619 -0.479 -1.620 2.062
##
## $lvs
## lv
## rows lv1 lv2
## 1 0.039 -0.853
## 2 0.078 -0.940
## 3 0.210 -0.675
## 4 -0.293 -1.126
## 5 0.245 -0.806
## 6 0.529 -0.884
## 7 0.183 -0.680
## 8 0.367 0.413
## 9 -0.238 0.346
## 10 -0.199 0.259
## 11 -0.282 0.353
## 12 -0.207 0.306
## 13 -0.324 0.454
## 14 -0.148 0.596
## 15 -0.262 0.509
## 16 -0.225 0.615
## 17 0.019 1.181
## 18 -0.064 1.147
## 19 -0.308 1.257
## 20 -0.253 1.425
## 21 -0.565 1.272
## 22 0.050 1.305
## 23 0.469 0.177
## 24 0.215 0.089
## 25 0.311 0.253
## 26 1.205 0.051
## 27 0.668 0.085
## 28 0.812 -0.090
## 29 0.896 -0.066
## 30 -0.443 -0.345
## 31 -0.258 -0.345
## 32 -0.147 -0.212
## 33 -0.257 -0.120
## 34 -0.083 -0.330
## 35 -0.040 -0.226
## 36 -1.137 -0.698
## 37 -0.627 -0.462
## 38 -0.452 -0.103
## 39 -0.301 -0.330
## 40 0.112 -0.241
## 41 0.121 -0.301
## 42 0.070 -0.435
## 43 0.095 -0.440
## 44 0.167 -0.648
## 45 -0.018 -0.470
## 46 0.078 -0.169
## 47 0.001 -0.418
## 48 -0.119 -0.172
## 49 0.130 -0.346
## 50 0.146 -0.213
## 51 -0.166 -0.208
## 52 0.302 -0.205
## 53 0.220 -0.290
## 54 0.170 -0.399
## 55 0.071 -0.312
## 56 -0.287 -0.262
## 57 -0.204 -0.180
## 58 -0.177 -0.221
## 59 -0.274 -0.331
## 60 0.619 0.315
## 61 0.765 0.404
##
## $row.coefficients
## $row.coefficients[[1]]
## 1 2 3 4 5 6 7 8 9 10
## -3.002 -3.852 -2.339 -4.652 -3.675 -3.294 -2.448 -2.950 -1.478 -1.186
## 11 12 13 14 15 16 17 18 19 20
## -1.559 -1.487 -1.905 -2.602 -1.937 -2.177 -3.750 -3.959 -3.839 -4.352
## 21 22 23 24 25 26 27 28 29 30
## -4.520 -4.166 -2.885 -2.272 -2.172 -5.011 -2.516 -2.920 -3.676 -2.807
## 31 32 33 34 35 36 37 38 39 40
## -2.032 -2.452 -2.474 -2.509 -1.637 -4.669 -3.484 -2.010 -2.333 -1.881
## 41 42 43 44 45 46 47 48 49 50
## -1.934 -2.014 -1.946 -2.649 -1.863 -1.250 -2.146 -2.255 -2.471 -2.357
## 51 52 53 54 55 56 57 58 59 60
## -2.299 -1.582 -1.336 -1.327 -1.690 -1.436 -1.275 -1.383 -1.856 -2.771
## 61
## -3.637
##
##
## $est
## [1] "median"
##
## $calc.ics
## [1] FALSE
##
## $trial.size
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [71] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [106] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [141] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [176] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [211] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [246] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##
## $num.ord.levels
## [1] 0
##
## $prior.control
## $prior.control$ssvs.index
## [1] -1
##
##
## attr(,"class")
## [1] "summary.boral"
par(mfrow = c(2,2))
plot(commB.fit.b3.4.none) ## Plots used in residual analysis,
## NULL
## Warning in qnorm(get_mus[, j] + 1e-05): NaNs produced
par(mfrow = c(1,1))
commB.fit.b3.4.none.ord <- lvsplot(commB.fit.b3.4.none, biplot=FALSE, col = as.numeric(habitatN$Habitat), return.vals = TRUE)
res.cors <- get.residual.cor(commB.fit.b3.4.none); beep(1) # residual deviation
res.cors$trace
## [1] 4419.197
# 4419.197
# habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,1]
cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,1], method = "pearson")
##
## Pearson's product-moment correlation
##
## data: habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 1]
## t = -3.9509, df = 59, p-value = 0.0002106
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.6359671 -0.2323392
## sample estimates:
## cor
## -0.4573986
# t = -3.9509, df = 59, p-value = 0.0002106
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval: -0.6359671 -0.2323392
# sample estimates: cor -0.4573986
cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,1], method = "kendall")
##
## Kendall's rank correlation tau
##
## data: habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 1]
## z = -4.0139, p-value = 5.973e-05
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## -0.3525554
cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,1], method = "spearman")
## Warning in cor.test.default(habitatN$Elevation_m, y =
## commB.fit.b3.4.none$lv.iqr[, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 1]
## S = 56743, p-value = 4.022e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5003503
# S = 56743, p-value = 4.022e-05
# alternative hypothesis: true rho is not equal to 0
# sample estimates: rho -0.5003503
cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,2], method = "pearson")
##
## Pearson's product-moment correlation
##
## data: habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 2]
## t = -5.0463, df = 59, p-value = 4.603e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7036107 -0.3449532
## sample estimates:
## cor
## -0.5490776
# t = -5.0463, df = 59, p-value = 4.603e-06
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval: -0.7036107 -0.3449532
#sample estimates: cor -0.5490776
cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,2], method = "kendall")
##
## Kendall's rank correlation tau
##
## data: habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 2]
## z = -4.3997, p-value = 1.084e-05
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## -0.3864444
cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,2], method = "spearman")
## Warning in cor.test.default(habitatN$Elevation_m, y =
## commB.fit.b3.4.none$lv.iqr[, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 2]
## S = 59369, p-value = 1.645e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5697854
# S = 59369, p-value = 1.645e-06
# alternative hypothesis: true rho is not equal to 0
#sample estimates: rho -0.5697854
Try to interpret the latent variables. Latent variables are clearly correlated with the forest types (habitatN$Habitat) but weakly at best correlated with Simpson_evenness_index, Elevation_m, and weather_value. There does seem to be a quadratic relationship between lvs2 and Elevation_m. The correlation plot suggests that there are some informative Landsat channels.
cbind(commB.fit.b3.4.none.ord$scaled.lvs,habitatN$Habitat)
## [,1] [,2] [,3]
## [1,] -2.44609743 0.14120912 1
## [2,] -2.71739500 0.02594075 1
## [3,] -1.95262004 -0.50982462 1
## [4,] -3.18536197 1.38226122 1
## [5,] -2.35547004 -0.59986978 1
## [6,] -2.66528779 -1.57655378 1
## [7,] -1.96043686 -0.41521070 1
## [8,] 1.29233240 -1.35877897 2
## [9,] 1.24502413 0.79077191 2
## [10,] 0.97233331 0.67588529 2
## [11,] 1.27739755 0.94241330 2
## [12,] 1.11631926 0.69251491 2
## [13,] 1.59319358 1.06383239 2
## [14,] 1.97665578 0.40647208 2
## [15,] 1.74210233 0.83076662 2
## [16,] 2.05490833 0.66967150 2
## [17,] 3.70022098 -0.34327830 2
## [18,] 3.62047349 -0.03867699 2
## [19,] 4.01380983 0.79084955 2
## [20,] 4.50639852 0.54992883 2
## [21,] 4.12465736 1.68866025 2
## [22,] 4.06900242 -0.48593482 2
## [23,] 0.55520084 -1.65516835 3
## [24,] 0.35413600 -0.73561800 3
## [25,] 0.82499633 -1.12001204 3
## [26,] -0.01387935 -4.21208784 3
## [27,] 0.22575056 -2.32990136 3
## [28,] -0.33700670 -2.78820551 3
## [29,] -0.28875985 -3.09199085 3
## [30,] -0.78772274 1.70052382 4
## [31,] -0.83774256 1.04730697 4
## [32,] -0.46196299 0.62200003 4
## [33,] -0.15737333 0.98320665 4
## [34,] -0.83530198 0.42632406 4
## [35,] -0.53292512 0.24800321 4
## [36,] -1.67801691 4.24030441 4
## [37,] -1.09485419 2.38076134 4
## [38,] -0.05688640 1.66663045 4
## [39,] -0.78108896 1.19630589 4
## [40,] -0.61668994 -0.28198669 5
## [41,] -0.79990450 -0.29730090 5
## [42,] -1.19224050 -0.08209765 5
## [43,] -1.21216052 -0.16997671 5
## [44,] -1.85802182 -0.36629931 5
## [45,] -1.27467902 0.23566789 5
## [46,] -0.38940061 -0.18206862 5
## [47,] -1.12414642 0.15539083 5
## [48,] -0.35005590 0.51321837 5
## [49,] -0.93712748 -0.31671391 6
## [50,] -0.53978252 -0.41119495 6
## [51,] -0.44502825 0.68739927 6
## [52,] -0.55507552 -0.96147362 6
## [53,] -0.79114685 -0.65087306 6
## [54,] -1.10919122 -0.44640189 6
## [55,] -0.81955229 -0.11903023 6
## [56,] -0.57857530 1.12806395 6
## [57,] -0.35134575 0.81193754 6
## [58,] -0.48287375 0.72932460 6
## [59,] -0.78927873 1.09929112 6
## [60,] 0.93308104 -2.21828393 6
## [61,] 1.16447301 -2.75802475 6
par(mfrow = c(1,2))
plot(commB.fit.b3.4.none.ord$scaled.lvs[,1]~habitatN$Habitat)
plot(commB.fit.b3.4.none.ord$scaled.lvs[,2]~habitatN$Habitat)
plot(commB.fit.b3.4.none.ord$scaled.lvs[,1]~habitatN$Simpson_evenness_index)
plot(commB.fit.b3.4.none.ord$scaled.lvs[,2]~habitatN$Simpson_evenness_index)
plot(commB.fit.b3.4.none.ord$scaled.lvs[,1]~habitatN$Elevation_m)
plot(commB.fit.b3.4.none.ord$scaled.lvs[,2]~habitatN$Elevation_m)
plot(commB.fit.b3.4.none.ord$scaled.lvs[,1]~habitatN$weather_value)
plot(commB.fit.b3.4.none.ord$scaled.lvs[,2]~habitatN$weather_value)
# correlation matrix of 2 latent variables (1, 2) with all the Landsat channels. Look only at the two left columns
lvsmatrix <- habitatN %>% dplyr::select(starts_with("x"))
lvsmatrix <- cbind(commB.fit.b3.4.none.ord$scaled.lvs, lvsmatrix)
lvsmatrix_cor <- cor(lvsmatrix)
par(mfrow = c(1,1))
corrplot(lvsmatrix_cor, type = "lower", tl.pos = "l", tl.cex = 0.5, sig.level = 0.05, insig = "blank")
Testing whether i can combine levels within habitatN$habitat. I use mvabund to test whether MF and NF are significantly different from each other and from the other habitats: BB, EC, JC, and CL. I do this by creating two models, one with all 6 levels and one with NF|MF combined with one of the other two habitats. I run mvabund on both models and use anova to compare the two models. If significantly different, then the two habitat types are significantly different. Finally, i conservatively adjust the p-values for all 15 possible pairwise comparisons (6 * 5)/2.
anova.manyglm options test = “score” # anova.manyglm help file says that test=“wald” is poor for binomial data under some conditions. “score” is the better alternative. cor.type = “shrink” # estimates correlations between species, but in an efficient way, which is necessary for our kind of dataset, where there are many more species than there are samples
# communityB <- communityB[ , 1:150] # comment away if i want to use the whole dataset. this is to produce a partial dataset for debugging
communityB.mvb <- mvabund(communityB)
#nboot <- 499 # set to 20 for debugging (~25 mins for nboot = 499)
nboot <- 999 # set to 999 for publication
# base model with no levels combined
mod_BBCLECJCMFNF.mvb <- manyglm(communityB.mvb ~ Habitat, data = habitatN, family = binomial("cloglog"))
plot(mod_BBCLECJCMFNF.mvb)
anova(mod_BBCLECJCMFNF.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.001, df = 5, score = 534.8, nBoot = 999, 20 mins. The Habitat predictor has a sig effect
## Resampling begins for test 1.
## Resampling run 0 finished. Time elapsed: 0.01 minutes...
## Resampling run 100 finished. Time elapsed: 1.46 minutes...
## Resampling run 200 finished. Time elapsed: 2.87 minutes...
## Resampling run 300 finished. Time elapsed: 4.30 minutes...
## Resampling run 400 finished. Time elapsed: 5.72 minutes...
## Resampling run 500 finished. Time elapsed: 7.14 minutes...
## Resampling run 600 finished. Time elapsed: 8.59 minutes...
## Resampling run 700 finished. Time elapsed: 10.01 minutes...
## Resampling run 800 finished. Time elapsed: 11.44 minutes...
## Resampling run 900 finished. Time elapsed: 12.86 minutes...
## Time elapsed: 0 hr 14 min 15 sec
## Analysis of Variance Table
##
## Model: manyglm(formula = communityB.mvb ~ Habitat, family = binomial("cloglog"),
## Model: data = habitatN)
##
## Multivariate test:
## Res.Df Df.diff score Pr(>score)
## (Intercept) 60
## Habitat 55 5 538.5 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
## Test statistics calculated assuming correlated response via ridge regularization
## P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine NF and MF
habitatN$HabitatMFNF <- fct_collapse(habitatN$Habitat, MFNF = c("MF", "NF"))
fct_count(habitatN$HabitatMFNF)
## # A tibble: 5 x 2
## f n
## <fct> <int>
## 1 BB 7
## 2 CL 15
## 3 EC 7
## 4 JC 10
## 5 MFNF 22
mod_CLBBECJC_MFNF.mvb <- manyglm(communityB.mvb ~ HabitatMFNF, data = habitatN, family = binomial("cloglog"))
plot(mod_CLBBECJC_MFNF.mvb)
anova(mod_BBCLECJCMFNF.mvb, mod_CLBBECJC_MFNF.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.001, df = 1, score= 55.42, nBoot = 999. MF and NF are sig diff
## Resampling begins for test 1.
## Resampling run 0 finished. Time elapsed: 0.01 minutes...
## Resampling run 100 finished. Time elapsed: 1.51 minutes...
## Resampling run 200 finished. Time elapsed: 3.02 minutes...
## Resampling run 300 finished. Time elapsed: 4.52 minutes...
## Resampling run 400 finished. Time elapsed: 6.06 minutes...
## Resampling run 500 finished. Time elapsed: 7.53 minutes...
## Resampling run 600 finished. Time elapsed: 8.99 minutes...
## Resampling run 700 finished. Time elapsed: 10.51 minutes...
## Resampling run 800 finished. Time elapsed: 12.08 minutes...
## Resampling run 900 finished. Time elapsed: 13.62 minutes...
## Time elapsed: 0 hr 15 min 8 sec
## Analysis of Variance Table
##
## mod_CLBBECJC_MFNF.mvb: communityB.mvb ~ HabitatMFNF
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
##
## Multivariate test:
## Res.Df Df.diff score Pr(>score)
## mod_CLBBECJC_MFNF.mvb 56
## mod_BBCLECJCMFNF.mvb 55 1 57.34 0.009 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
## Test statistics calculated assuming correlated response via ridge regularization
## P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine NF and BB
habitatN$HabitatNFBB <- fct_collapse(habitatN$Habitat, NFBB = c("NF", "BB"))
fct_count(habitatN$HabitatNFBB)
## # A tibble: 5 x 2
## f n
## <fct> <int>
## 1 NFBB 20
## 2 CL 15
## 3 EC 7
## 4 JC 10
## 5 MF 9
mod_CLECJCMF_NFBB.mvb <- manyglm(communityB.mvb ~ HabitatNFBB, data = habitatN, family = binomial("cloglog"))
plot(mod_CLECJCMF_NFBB.mvb)
anova(mod_BBCLECJCMFNF.mvb, mod_CLECJCMF_NFBB.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.001, df = 1, score= 54.88, nBoot = 999, 20 mins. BB and NF are sig diff
## Resampling begins for test 1.
## Resampling run 0 finished. Time elapsed: 0.02 minutes...
## Resampling run 100 finished. Time elapsed: 1.55 minutes...
## Resampling run 200 finished. Time elapsed: 3.11 minutes...
## Resampling run 300 finished. Time elapsed: 4.65 minutes...
## Resampling run 400 finished. Time elapsed: 6.18 minutes...
## Resampling run 500 finished. Time elapsed: 7.71 minutes...
## Resampling run 600 finished. Time elapsed: 9.24 minutes...
## Resampling run 700 finished. Time elapsed: 10.76 minutes...
## Resampling run 800 finished. Time elapsed: 12.30 minutes...
## Resampling run 900 finished. Time elapsed: 13.82 minutes...
## Time elapsed: 0 hr 15 min 20 sec
## Analysis of Variance Table
##
## mod_CLECJCMF_NFBB.mvb: communityB.mvb ~ HabitatNFBB
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
##
## Multivariate test:
## Res.Df Df.diff score Pr(>score)
## mod_CLECJCMF_NFBB.mvb 56
## mod_BBCLECJCMFNF.mvb 55 1 54.88 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
## Test statistics calculated assuming correlated response via ridge regularization
## P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine NF and JC
habitatN$HabitatNFJC <- fct_collapse(habitatN$Habitat, NFJC = c("NF", "JC"))
fct_count(habitatN$HabitatNFJC)
## # A tibble: 5 x 2
## f n
## <fct> <int>
## 1 BB 7
## 2 CL 15
## 3 EC 7
## 4 NFJC 23
## 5 MF 9
mod_CLECBBMF_NFJC.mvb <- manyglm(communityB.mvb ~ HabitatNFJC, data = habitatN, family = binomial("cloglog"))
plot(mod_CLECBBMF_NFJC.mvb)
anova(mod_BBCLECJCMFNF.mvb, mod_CLECBBMF_NFJC.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.001, df = 1, score= 58.09, nBoot = 999, 20 mins. JC and NF are sig diff
## Resampling begins for test 1.
## Resampling run 0 finished. Time elapsed: 0.02 minutes...
## Resampling run 100 finished. Time elapsed: 1.55 minutes...
## Resampling run 200 finished. Time elapsed: 3.07 minutes...
## Resampling run 300 finished. Time elapsed: 4.60 minutes...
## Resampling run 400 finished. Time elapsed: 6.14 minutes...
## Resampling run 500 finished. Time elapsed: 7.66 minutes...
## Resampling run 600 finished. Time elapsed: 9.19 minutes...
## Resampling run 700 finished. Time elapsed: 10.71 minutes...
## Resampling run 800 finished. Time elapsed: 12.24 minutes...
## Resampling run 900 finished. Time elapsed: 13.77 minutes...
## Time elapsed: 0 hr 15 min 17 sec
## Analysis of Variance Table
##
## mod_CLECBBMF_NFJC.mvb: communityB.mvb ~ HabitatNFJC
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
##
## Multivariate test:
## Res.Df Df.diff score Pr(>score)
## mod_CLECBBMF_NFJC.mvb 56
## mod_BBCLECJCMFNF.mvb 55 1 58.09 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
## Test statistics calculated assuming correlated response via ridge regularization
## P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine NF and EC
habitatN$HabitatNFEC <- fct_collapse(habitatN$Habitat, NFEC = c("NF", "EC"))
fct_count(habitatN$HabitatNFEC)
## # A tibble: 5 x 2
## f n
## <fct> <int>
## 1 BB 7
## 2 CL 15
## 3 NFEC 20
## 4 JC 10
## 5 MF 9
mod_CLBBJCMF_NFEC.mvb <- manyglm(communityB.mvb ~ HabitatNFEC, data = habitatN, family = binomial("cloglog"))
plot(mod_CLBBJCMF_NFEC.mvb)
anova(mod_BBCLECJCMFNF.mvb, mod_CLBBJCMF_NFEC.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.003, df = 1, score= 54.41, nBoot = 999, 20 mins. EC and NF are sig diff
## Resampling begins for test 1.
## Resampling run 0 finished. Time elapsed: 0.02 minutes...
## Resampling run 100 finished. Time elapsed: 1.54 minutes...
## Resampling run 200 finished. Time elapsed: 3.07 minutes...
## Resampling run 300 finished. Time elapsed: 4.59 minutes...
## Resampling run 400 finished. Time elapsed: 6.11 minutes...
## Resampling run 500 finished. Time elapsed: 7.64 minutes...
## Resampling run 600 finished. Time elapsed: 9.16 minutes...
## Resampling run 700 finished. Time elapsed: 10.69 minutes...
## Resampling run 800 finished. Time elapsed: 12.22 minutes...
## Resampling run 900 finished. Time elapsed: 13.74 minutes...
## Time elapsed: 0 hr 15 min 15 sec
## Analysis of Variance Table
##
## mod_CLBBJCMF_NFEC.mvb: communityB.mvb ~ HabitatNFEC
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
##
## Multivariate test:
## Res.Df Df.diff score Pr(>score)
## mod_CLBBJCMF_NFEC.mvb 56
## mod_BBCLECJCMFNF.mvb 55 1 50.63 0.003 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
## Test statistics calculated assuming correlated response via ridge regularization
## P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine NF and CL
habitatN$HabitatNFCL <- fct_collapse(habitatN$Habitat, NFCL = c("NF", "CL"))
fct_count(habitatN$HabitatNFCL)
## # A tibble: 5 x 2
## f n
## <fct> <int>
## 1 BB 7
## 2 NFCL 28
## 3 EC 7
## 4 JC 10
## 5 MF 9
mod_ECBBJCMF_NFCL.mvb <- manyglm(communityB.mvb ~ HabitatNFCL, data = habitatN, family = binomial("cloglog"))
plot(mod_ECBBJCMF_NFCL.mvb)
anova(mod_BBCLECJCMFNF.mvb, mod_ECBBJCMF_NFCL.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.001, df = 1, score= 64.25, nBoot = 999, 20 mins. CL and NF are sig diff
## Resampling begins for test 1.
## Resampling run 0 finished. Time elapsed: 0.02 minutes...
## Resampling run 100 finished. Time elapsed: 1.53 minutes...
## Resampling run 200 finished. Time elapsed: 3.04 minutes...
## Resampling run 300 finished. Time elapsed: 4.56 minutes...
## Resampling run 400 finished. Time elapsed: 6.07 minutes...
## Resampling run 500 finished. Time elapsed: 7.58 minutes...
## Resampling run 600 finished. Time elapsed: 9.09 minutes...
## Resampling run 700 finished. Time elapsed: 10.60 minutes...
## Resampling run 800 finished. Time elapsed: 12.11 minutes...
## Resampling run 900 finished. Time elapsed: 13.62 minutes...
## Time elapsed: 0 hr 15 min 6 sec
## Analysis of Variance Table
##
## mod_ECBBJCMF_NFCL.mvb: communityB.mvb ~ HabitatNFCL
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
##
## Multivariate test:
## Res.Df Df.diff score Pr(>score)
## mod_ECBBJCMF_NFCL.mvb 56
## mod_BBCLECJCMFNF.mvb 55 1 77.19 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
## Test statistics calculated assuming correlated response via ridge regularization
## P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
## do a table-wide correction, under the assumption that there are 15 possible pairwise comparisons between all 6 forest types: (5*6)/2 = 15
pvalues <- c(0.001, .001, .001, .003, .001)
pvalues.corr.fdr<-p.adjust(pvalues, method = "fdr", n = length(pvalues))
pvalues.corr.fdr
## [1] 0.00125 0.00125 0.00125 0.00300 0.00125
# 0.00125 0.00125 0.00125 0.00300 0.00125
# communityB <- communityB[ , 1:150] # comment away if i want to use the whole dataset. this is to produce a partial dataset for debugging
communityB.mvb <- mvabund(communityB)
# nboot <- 499 # set to 20 for debugging (~ 20 mins for 499)
nboot <- 999 # set to 999 for publication
# base model with no levels combined: mod_BBCLECJCMFNF.mvb
# combine NF and MF: already done above
# combine MF and BB
habitatN$HabitatMFBB <- fct_collapse(habitatN$Habitat, MFBB = c("MF", "BB"))
fct_count(habitatN$HabitatMFBB)
## # A tibble: 5 x 2
## f n
## <fct> <int>
## 1 MFBB 16
## 2 CL 15
## 3 EC 7
## 4 JC 10
## 5 NF 13
mod_CLECJCNF_MFBB.mvb <- manyglm(communityB.mvb ~ HabitatMFBB, data = habitatN, family = binomial("cloglog"))
plot(mod_CLECJCNF_MFBB.mvb)
anova(mod_BBCLECJCMFNF.mvb, mod_CLECJCNF_MFBB.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.001, df = 1, score= 54.23, nBoot = 999, 20 mins. BB and MF are sig diff
## Resampling begins for test 1.
## Resampling run 0 finished. Time elapsed: 0.02 minutes...
## Resampling run 100 finished. Time elapsed: 1.53 minutes...
## Resampling run 200 finished. Time elapsed: 3.04 minutes...
## Resampling run 300 finished. Time elapsed: 4.55 minutes...
## Resampling run 400 finished. Time elapsed: 6.06 minutes...
## Resampling run 500 finished. Time elapsed: 7.57 minutes...
## Resampling run 600 finished. Time elapsed: 9.08 minutes...
## Resampling run 700 finished. Time elapsed: 10.59 minutes...
## Resampling run 800 finished. Time elapsed: 12.10 minutes...
## Resampling run 900 finished. Time elapsed: 13.61 minutes...
## Time elapsed: 0 hr 15 min 6 sec
## Analysis of Variance Table
##
## mod_CLECJCNF_MFBB.mvb: communityB.mvb ~ HabitatMFBB
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
##
## Multivariate test:
## Res.Df Df.diff score Pr(>score)
## mod_CLECJCNF_MFBB.mvb 56
## mod_BBCLECJCMFNF.mvb 55 1 52.97 0.002 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
## Test statistics calculated assuming correlated response via ridge regularization
## P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine MF and JC
habitatN$HabitatMFJC <- fct_collapse(habitatN$Habitat, MFJC = c("MF", "JC"))
fct_count(habitatN$HabitatMFJC)
## # A tibble: 5 x 2
## f n
## <fct> <int>
## 1 BB 7
## 2 CL 15
## 3 EC 7
## 4 MFJC 19
## 5 NF 13
mod_CLECBBNF_MFJC.mvb <- manyglm(communityB.mvb ~ HabitatMFJC, data = habitatN, family = binomial("cloglog"))
plot(mod_CLECBBNF_MFJC.mvb)
anova(mod_BBCLECJCMFNF.mvb, mod_CLECBBNF_MFJC.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.001, df = 1, score= 58.73, nBoot = 999, 20 mins. JC and MF are sig diff
## Resampling begins for test 1.
## Resampling run 0 finished. Time elapsed: 0.02 minutes...
## Resampling run 100 finished. Time elapsed: 1.53 minutes...
## Resampling run 200 finished. Time elapsed: 3.04 minutes...
## Resampling run 300 finished. Time elapsed: 4.55 minutes...
## Resampling run 400 finished. Time elapsed: 6.06 minutes...
## Resampling run 500 finished. Time elapsed: 7.58 minutes...
## Resampling run 600 finished. Time elapsed: 9.09 minutes...
## Resampling run 700 finished. Time elapsed: 10.60 minutes...
## Resampling run 800 finished. Time elapsed: 12.12 minutes...
## Resampling run 900 finished. Time elapsed: 13.63 minutes...
## Time elapsed: 0 hr 15 min 7 sec
## Analysis of Variance Table
##
## mod_CLECBBNF_MFJC.mvb: communityB.mvb ~ HabitatMFJC
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
##
## Multivariate test:
## Res.Df Df.diff score Pr(>score)
## mod_CLECBBNF_MFJC.mvb 56
## mod_BBCLECJCMFNF.mvb 55 1 57.82 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
## Test statistics calculated assuming correlated response via ridge regularization
## P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine MF and EC
habitatN$HabitatMFEC <- fct_collapse(habitatN$Habitat, MFEC = c("MF", "EC"))
fct_count(habitatN$HabitatMFEC)
## # A tibble: 5 x 2
## f n
## <fct> <int>
## 1 BB 7
## 2 CL 15
## 3 MFEC 16
## 4 JC 10
## 5 NF 13
mod_CLBBJCNF_MFEC.mvb <- manyglm(communityB.mvb ~ HabitatMFEC, data = habitatN, family = binomial("cloglog"))
plot(mod_CLBBJCNF_MFEC.mvb)
anova(mod_BBCLECJCMFNF.mvb, mod_CLBBJCNF_MFEC.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.001, df = 1, score= 55.29, nBoot = 999, 20 mins. EC and MF are sig diff
## Resampling begins for test 1.
## Resampling run 0 finished. Time elapsed: 0.02 minutes...
## Resampling run 100 finished. Time elapsed: 1.53 minutes...
## Resampling run 200 finished. Time elapsed: 3.05 minutes...
## Resampling run 300 finished. Time elapsed: 4.56 minutes...
## Resampling run 400 finished. Time elapsed: 6.08 minutes...
## Resampling run 500 finished. Time elapsed: 7.59 minutes...
## Resampling run 600 finished. Time elapsed: 9.10 minutes...
## Resampling run 700 finished. Time elapsed: 10.61 minutes...
## Resampling run 800 finished. Time elapsed: 12.13 minutes...
## Resampling run 900 finished. Time elapsed: 13.64 minutes...
## Time elapsed: 0 hr 15 min 8 sec
## Analysis of Variance Table
##
## mod_CLBBJCNF_MFEC.mvb: communityB.mvb ~ HabitatMFEC
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
##
## Multivariate test:
## Res.Df Df.diff score Pr(>score)
## mod_CLBBJCNF_MFEC.mvb 56
## mod_BBCLECJCMFNF.mvb 55 1 55.93 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
## Test statistics calculated assuming correlated response via ridge regularization
## P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine MF and CL
habitatN$HabitatMFCL <- fct_collapse(habitatN$Habitat, MFCL = c("MF", "CL"))
fct_count(habitatN$HabitatMFCL)
## # A tibble: 5 x 2
## f n
## <fct> <int>
## 1 BB 7
## 2 MFCL 24
## 3 EC 7
## 4 JC 10
## 5 NF 13
mod_ECBBJCNF_MFCL.mvb <- manyglm(communityB.mvb ~ HabitatMFCL, data = habitatN, family = binomial("cloglog"))
plot(mod_ECBBJCNF_MFCL.mvb)
anova(mod_BBCLECJCMFNF.mvb, mod_ECBBJCNF_MFCL.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.001, df = 1, score= 71.41, nBoot = 999, 20 mins. CL and MF are sig diff
## Resampling begins for test 1.
## Resampling run 0 finished. Time elapsed: 0.02 minutes...
## Resampling run 100 finished. Time elapsed: 1.53 minutes...
## Resampling run 200 finished. Time elapsed: 3.03 minutes...
## Resampling run 300 finished. Time elapsed: 4.54 minutes...
## Resampling run 400 finished. Time elapsed: 6.06 minutes...
## Resampling run 500 finished. Time elapsed: 7.58 minutes...
## Resampling run 600 finished. Time elapsed: 9.09 minutes...
## Resampling run 700 finished. Time elapsed: 10.60 minutes...
## Resampling run 800 finished. Time elapsed: 12.12 minutes...
## Resampling run 900 finished. Time elapsed: 13.63 minutes...
## Time elapsed: 0 hr 15 min 7 sec
## Analysis of Variance Table
##
## mod_ECBBJCNF_MFCL.mvb: communityB.mvb ~ HabitatMFCL
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
##
## Multivariate test:
## Res.Df Df.diff score Pr(>score)
## mod_ECBBJCNF_MFCL.mvb 56
## mod_BBCLECJCMFNF.mvb 55 1 72.55 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
## Test statistics calculated assuming correlated response via ridge regularization
## P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# do a table-wide correction, under the assumption that there are 30 possible pairwise comparisons between all 6 forest types: (5*6)/2 = 15
pvalues <- c(0.001, .001, .001, .001)
pvalues.corr.fdr<-p.adjust(pvalues, method = "fdr", n = length(pvalues))
pvalues.corr.fdr
## [1] 0.001 0.001 0.001 0.001
# [1] 0.001 0.001 0.001 0.001
# pvalues.corr.fdr<-p.adjust(pvalues, method = "fdr", n = length(pvalues))